summaryrefslogtreecommitdiff
path: root/boost/compute
diff options
context:
space:
mode:
Diffstat (limited to 'boost/compute')
-rw-r--r--boost/compute/algorithm.hpp94
-rw-r--r--boost/compute/algorithm/accumulate.hpp184
-rw-r--r--boost/compute/algorithm/adjacent_difference.hpp98
-rw-r--r--boost/compute/algorithm/adjacent_find.hpp162
-rw-r--r--boost/compute/algorithm/all_of.hpp36
-rw-r--r--boost/compute/algorithm/any_of.hpp40
-rw-r--r--boost/compute/algorithm/binary_search.hpp37
-rw-r--r--boost/compute/algorithm/copy.hpp362
-rw-r--r--boost/compute/algorithm/copy_if.hpp58
-rw-r--r--boost/compute/algorithm/copy_n.hpp51
-rw-r--r--boost/compute/algorithm/count.hpp55
-rw-r--r--boost/compute/algorithm/count_if.hpp62
-rw-r--r--boost/compute/algorithm/detail/balanced_path.hpp162
-rw-r--r--boost/compute/algorithm/detail/binary_find.hpp133
-rw-r--r--boost/compute/algorithm/detail/compact.hpp77
-rw-r--r--boost/compute/algorithm/detail/copy_on_device.hpp190
-rw-r--r--boost/compute/algorithm/detail/copy_to_device.hpp127
-rw-r--r--boost/compute/algorithm/detail/copy_to_host.hpp137
-rw-r--r--boost/compute/algorithm/detail/count_if_with_ballot.hpp78
-rw-r--r--boost/compute/algorithm/detail/count_if_with_reduce.hpp87
-rw-r--r--boost/compute/algorithm/detail/count_if_with_threads.hpp129
-rw-r--r--boost/compute/algorithm/detail/find_extrema.hpp64
-rw-r--r--boost/compute/algorithm/detail/find_extrema_with_atomics.hpp108
-rw-r--r--boost/compute/algorithm/detail/find_extrema_with_reduce.hpp443
-rw-r--r--boost/compute/algorithm/detail/find_if_with_atomics.hpp212
-rw-r--r--boost/compute/algorithm/detail/inplace_reduce.hpp136
-rw-r--r--boost/compute/algorithm/detail/insertion_sort.hpp165
-rw-r--r--boost/compute/algorithm/detail/merge_path.hpp116
-rw-r--r--boost/compute/algorithm/detail/merge_sort_on_cpu.hpp366
-rw-r--r--boost/compute/algorithm/detail/merge_with_merge_path.hpp203
-rw-r--r--boost/compute/algorithm/detail/radix_sort.hpp415
-rw-r--r--boost/compute/algorithm/detail/random_fill.hpp57
-rw-r--r--boost/compute/algorithm/detail/reduce_by_key.hpp119
-rw-r--r--boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp541
-rw-r--r--boost/compute/algorithm/detail/reduce_on_gpu.hpp286
-rw-r--r--boost/compute/algorithm/detail/scan.hpp45
-rw-r--r--boost/compute/algorithm/detail/scan_on_cpu.hpp103
-rw-r--r--boost/compute/algorithm/detail/scan_on_gpu.hpp331
-rw-r--r--boost/compute/algorithm/detail/search_all.hpp86
-rw-r--r--boost/compute/algorithm/detail/serial_accumulate.hpp56
-rw-r--r--boost/compute/algorithm/detail/serial_count_if.hpp68
-rw-r--r--boost/compute/algorithm/detail/serial_find_extrema.hpp87
-rw-r--r--boost/compute/algorithm/detail/serial_merge.hpp97
-rw-r--r--boost/compute/algorithm/detail/serial_reduce.hpp62
-rw-r--r--boost/compute/algorithm/detail/serial_reduce_by_key.hpp108
-rw-r--r--boost/compute/algorithm/equal.hpp53
-rw-r--r--boost/compute/algorithm/equal_range.hpp42
-rw-r--r--boost/compute/algorithm/exclusive_scan.hpp96
-rw-r--r--boost/compute/algorithm/fill.hpp306
-rw-r--r--boost/compute/algorithm/fill_n.hpp36
-rw-r--r--boost/compute/algorithm/find.hpp57
-rw-r--r--boost/compute/algorithm/find_end.hpp119
-rw-r--r--boost/compute/algorithm/find_if.hpp35
-rw-r--r--boost/compute/algorithm/find_if_not.hpp43
-rw-r--r--boost/compute/algorithm/for_each.hpp65
-rw-r--r--boost/compute/algorithm/for_each_n.hpp35
-rw-r--r--boost/compute/algorithm/gather.hpp84
-rw-r--r--boost/compute/algorithm/generate.hpp49
-rw-r--r--boost/compute/algorithm/generate_n.hpp35
-rw-r--r--boost/compute/algorithm/includes.hpp155
-rw-r--r--boost/compute/algorithm/inclusive_scan.hpp81
-rw-r--r--boost/compute/algorithm/inner_product.hpp93
-rw-r--r--boost/compute/algorithm/inplace_merge.hpp60
-rw-r--r--boost/compute/algorithm/iota.hpp48
-rw-r--r--boost/compute/algorithm/is_partitioned.hpp43
-rw-r--r--boost/compute/algorithm/is_permutation.hpp67
-rw-r--r--boost/compute/algorithm/is_sorted.hpp64
-rw-r--r--boost/compute/algorithm/lexicographical_compare.hpp117
-rw-r--r--boost/compute/algorithm/lower_bound.hpp44
-rw-r--r--boost/compute/algorithm/max_element.hpp74
-rw-r--r--boost/compute/algorithm/merge.hpp105
-rw-r--r--boost/compute/algorithm/min_element.hpp74
-rw-r--r--boost/compute/algorithm/minmax_element.hpp70
-rw-r--r--boost/compute/algorithm/mismatch.hpp89
-rw-r--r--boost/compute/algorithm/next_permutation.hpp170
-rw-r--r--boost/compute/algorithm/none_of.hpp36
-rw-r--r--boost/compute/algorithm/nth_element.hpp87
-rw-r--r--boost/compute/algorithm/partial_sum.hpp37
-rw-r--r--boost/compute/algorithm/partition.hpp39
-rw-r--r--boost/compute/algorithm/partition_copy.hpp63
-rw-r--r--boost/compute/algorithm/partition_point.hpp46
-rw-r--r--boost/compute/algorithm/prev_permutation.hpp170
-rw-r--r--boost/compute/algorithm/random_shuffle.hpp75
-rw-r--r--boost/compute/algorithm/reduce.hpp301
-rw-r--r--boost/compute/algorithm/reduce_by_key.hpp118
-rw-r--r--boost/compute/algorithm/remove.hpp54
-rw-r--r--boost/compute/algorithm/remove_if.hpp47
-rw-r--r--boost/compute/algorithm/replace.hpp90
-rw-r--r--boost/compute/algorithm/replace_copy.hpp62
-rw-r--r--boost/compute/algorithm/reverse.hpp74
-rw-r--r--boost/compute/algorithm/reverse_copy.hpp79
-rw-r--r--boost/compute/algorithm/rotate.hpp54
-rw-r--r--boost/compute/algorithm/rotate_copy.hpp41
-rw-r--r--boost/compute/algorithm/scatter.hpp99
-rw-r--r--boost/compute/algorithm/scatter_if.hpp119
-rw-r--r--boost/compute/algorithm/search.hpp73
-rw-r--r--boost/compute/algorithm/search_n.hpp140
-rw-r--r--boost/compute/algorithm/set_difference.hpp182
-rw-r--r--boost/compute/algorithm/set_intersection.hpp170
-rw-r--r--boost/compute/algorithm/set_symmetric_difference.hpp194
-rw-r--r--boost/compute/algorithm/set_union.hpp195
-rw-r--r--boost/compute/algorithm/sort.hpp194
-rw-r--r--boost/compute/algorithm/sort_by_key.hpp156
-rw-r--r--boost/compute/algorithm/stable_partition.hpp72
-rw-r--r--boost/compute/algorithm/stable_sort.hpp99
-rw-r--r--boost/compute/algorithm/stable_sort_by_key.hpp61
-rw-r--r--boost/compute/algorithm/swap_ranges.hpp44
-rw-r--r--boost/compute/algorithm/transform.hpp76
-rw-r--r--boost/compute/algorithm/transform_if.hpp117
-rw-r--r--boost/compute/algorithm/transform_reduce.hpp89
-rw-r--r--boost/compute/algorithm/unique.hpp66
-rw-r--r--boost/compute/algorithm/unique_copy.hpp164
-rw-r--r--boost/compute/algorithm/upper_bound.hpp43
-rw-r--r--boost/compute/allocator.hpp21
-rw-r--r--boost/compute/allocator/buffer_allocator.hpp118
-rw-r--r--boost/compute/allocator/pinned_allocator.hpp53
-rw-r--r--boost/compute/async.hpp21
-rw-r--r--boost/compute/async/future.hpp166
-rw-r--r--boost/compute/async/wait.hpp56
-rw-r--r--boost/compute/async/wait_guard.hpp63
-rw-r--r--boost/compute/buffer.hpp227
-rw-r--r--boost/compute/cl.hpp20
-rw-r--r--boost/compute/cl_ext.hpp20
-rw-r--r--boost/compute/closure.hpp347
-rw-r--r--boost/compute/command_queue.hpp1881
-rw-r--r--boost/compute/config.hpp70
-rw-r--r--boost/compute/container.hpp27
-rw-r--r--boost/compute/container/array.hpp281
-rw-r--r--boost/compute/container/basic_string.hpp331
-rw-r--r--boost/compute/container/detail/scalar.hpp61
-rw-r--r--boost/compute/container/dynamic_bitset.hpp237
-rw-r--r--boost/compute/container/flat_map.hpp406
-rw-r--r--boost/compute/container/flat_set.hpp339
-rw-r--r--boost/compute/container/mapped_view.hpp250
-rw-r--r--boost/compute/container/stack.hpp81
-rw-r--r--boost/compute/container/string.hpp25
-rw-r--r--boost/compute/container/valarray.hpp499
-rw-r--r--boost/compute/container/vector.hpp761
-rw-r--r--boost/compute/context.hpp245
-rw-r--r--boost/compute/core.hpp32
-rw-r--r--boost/compute/detail/assert_cl_success.hpp24
-rw-r--r--boost/compute/detail/buffer_value.hpp178
-rw-r--r--boost/compute/detail/device_ptr.hpp215
-rw-r--r--boost/compute/detail/diagnostic.hpp112
-rw-r--r--boost/compute/detail/duration.hpp50
-rw-r--r--boost/compute/detail/get_object_info.hpp216
-rw-r--r--boost/compute/detail/getenv.hpp36
-rw-r--r--boost/compute/detail/global_static.hpp37
-rw-r--r--boost/compute/detail/is_buffer_iterator.hpp30
-rw-r--r--boost/compute/detail/is_contiguous_iterator.hpp118
-rw-r--r--boost/compute/detail/iterator_plus_distance.hpp53
-rw-r--r--boost/compute/detail/iterator_range_size.hpp44
-rw-r--r--boost/compute/detail/iterator_traits.hpp35
-rw-r--r--boost/compute/detail/literal.hpp45
-rw-r--r--boost/compute/detail/lru_cache.hpp139
-rw-r--r--boost/compute/detail/meta_kernel.hpp1054
-rw-r--r--boost/compute/detail/mpl_vector_to_tuple.hpp65
-rw-r--r--boost/compute/detail/nvidia_compute_capability.hpp60
-rw-r--r--boost/compute/detail/parameter_cache.hpp215
-rw-r--r--boost/compute/detail/path.hpp73
-rw-r--r--boost/compute/detail/print_range.hpp82
-rw-r--r--boost/compute/detail/read_write_single_value.hpp77
-rw-r--r--boost/compute/detail/sha1.hpp53
-rw-r--r--boost/compute/detail/variadic_macros.hpp35
-rw-r--r--boost/compute/detail/vendor.hpp38
-rw-r--r--boost/compute/detail/work_size.hpp37
-rw-r--r--boost/compute/device.hpp584
-rw-r--r--boost/compute/event.hpp338
-rw-r--r--boost/compute/exception.hpp23
-rw-r--r--boost/compute/exception/context_error.hpp88
-rw-r--r--boost/compute/exception/no_device_found.hpp48
-rw-r--r--boost/compute/exception/opencl_error.hpp158
-rw-r--r--boost/compute/exception/unsupported_extension_error.hpp71
-rw-r--r--boost/compute/experimental/clamp_range.hpp49
-rw-r--r--boost/compute/experimental/malloc.hpp51
-rw-r--r--boost/compute/experimental/sort_by_transform.hpp66
-rw-r--r--boost/compute/experimental/tabulate.hpp44
-rw-r--r--boost/compute/function.hpp454
-rw-r--r--boost/compute/functional.hpp34
-rw-r--r--boost/compute/functional/as.hpp51
-rw-r--r--boost/compute/functional/atomic.hpp141
-rw-r--r--boost/compute/functional/bind.hpp261
-rw-r--r--boost/compute/functional/common.hpp29
-rw-r--r--boost/compute/functional/convert.hpp51
-rw-r--r--boost/compute/functional/detail/macros.hpp35
-rw-r--r--boost/compute/functional/detail/nvidia_ballot.hpp48
-rw-r--r--boost/compute/functional/detail/nvidia_popcount.hpp42
-rw-r--r--boost/compute/functional/detail/unpack.hpp143
-rw-r--r--boost/compute/functional/field.hpp86
-rw-r--r--boost/compute/functional/geometry.hpp32
-rw-r--r--boost/compute/functional/get.hpp76
-rw-r--r--boost/compute/functional/hash.hpp91
-rw-r--r--boost/compute/functional/identity.hpp64
-rw-r--r--boost/compute/functional/integer.hpp30
-rw-r--r--boost/compute/functional/logical.hpp208
-rw-r--r--boost/compute/functional/math.hpp80
-rw-r--r--boost/compute/functional/operator.hpp100
-rw-r--r--boost/compute/functional/popcount.hpp55
-rw-r--r--boost/compute/functional/relational.hpp39
-rw-r--r--boost/compute/image.hpp25
-rw-r--r--boost/compute/image/image1d.hpp204
-rw-r--r--boost/compute/image/image2d.hpp262
-rw-r--r--boost/compute/image/image3d.hpp265
-rw-r--r--boost/compute/image/image_format.hpp135
-rw-r--r--boost/compute/image/image_object.hpp170
-rw-r--r--boost/compute/image/image_sampler.hpp221
-rw-r--r--boost/compute/image2d.hpp12
-rw-r--r--boost/compute/image3d.hpp12
-rw-r--r--boost/compute/image_format.hpp12
-rw-r--r--boost/compute/image_sampler.hpp12
-rw-r--r--boost/compute/interop/eigen.hpp16
-rw-r--r--boost/compute/interop/eigen/core.hpp72
-rw-r--r--boost/compute/interop/opencv.hpp17
-rw-r--r--boost/compute/interop/opencv/core.hpp141
-rw-r--r--boost/compute/interop/opencv/highgui.hpp33
-rw-r--r--boost/compute/interop/opencv/ocl.hpp51
-rw-r--r--boost/compute/interop/opengl.hpp24
-rw-r--r--boost/compute/interop/opengl/acquire.hpp99
-rw-r--r--boost/compute/interop/opengl/cl_gl.hpp20
-rw-r--r--boost/compute/interop/opengl/cl_gl_ext.hpp20
-rw-r--r--boost/compute/interop/opengl/context.hpp135
-rw-r--r--boost/compute/interop/opengl/gl.hpp20
-rw-r--r--boost/compute/interop/opengl/opengl_buffer.hpp106
-rw-r--r--boost/compute/interop/opengl/opengl_renderbuffer.hpp129
-rw-r--r--boost/compute/interop/opengl/opengl_texture.hpp133
-rw-r--r--boost/compute/interop/qt.hpp17
-rw-r--r--boost/compute/interop/qt/qimage.hpp69
-rw-r--r--boost/compute/interop/qt/qpoint.hpp20
-rw-r--r--boost/compute/interop/qt/qpointf.hpp20
-rw-r--r--boost/compute/interop/qt/qtcore.hpp18
-rw-r--r--boost/compute/interop/qt/qtgui.hpp16
-rw-r--r--boost/compute/interop/qt/qvector.hpp48
-rw-r--r--boost/compute/interop/vtk.hpp19
-rw-r--r--boost/compute/interop/vtk/bounds.hpp59
-rw-r--r--boost/compute/interop/vtk/data_array.hpp65
-rw-r--r--boost/compute/interop/vtk/matrix4x4.hpp46
-rw-r--r--boost/compute/interop/vtk/points.hpp55
-rw-r--r--boost/compute/iterator.hpp28
-rw-r--r--boost/compute/iterator/buffer_iterator.hpp280
-rw-r--r--boost/compute/iterator/constant_buffer_iterator.hpp209
-rw-r--r--boost/compute/iterator/constant_iterator.hpp171
-rw-r--r--boost/compute/iterator/counting_iterator.hpp185
-rw-r--r--boost/compute/iterator/detail/get_base_iterator_buffer.hpp52
-rw-r--r--boost/compute/iterator/detail/swizzle_iterator.hpp188
-rw-r--r--boost/compute/iterator/discard_iterator.hpp170
-rw-r--r--boost/compute/iterator/function_input_iterator.hpp186
-rw-r--r--boost/compute/iterator/permutation_iterator.hpp192
-rw-r--r--boost/compute/iterator/strided_iterator.hpp296
-rw-r--r--boost/compute/iterator/transform_iterator.hpp227
-rw-r--r--boost/compute/iterator/zip_iterator.hpp316
-rw-r--r--boost/compute/kernel.hpp394
-rw-r--r--boost/compute/lambda.hpp22
-rw-r--r--boost/compute/lambda/context.hpp329
-rw-r--r--boost/compute/lambda/functional.hpp242
-rw-r--r--boost/compute/lambda/get.hpp148
-rw-r--r--boost/compute/lambda/make_pair.hpp70
-rw-r--r--boost/compute/lambda/make_tuple.hpp127
-rw-r--r--boost/compute/lambda/placeholder.hpp28
-rw-r--r--boost/compute/lambda/placeholders.hpp93
-rw-r--r--boost/compute/lambda/result_of.hpp113
-rw-r--r--boost/compute/memory.hpp21
-rw-r--r--boost/compute/memory/local_buffer.hpp91
-rw-r--r--boost/compute/memory/svm_ptr.hpp81
-rw-r--r--boost/compute/memory_object.hpp264
-rw-r--r--boost/compute/pipe.hpp154
-rw-r--r--boost/compute/platform.hpp235
-rw-r--r--boost/compute/program.hpp650
-rw-r--r--boost/compute/random.hpp28
-rw-r--r--boost/compute/random/bernoulli_distribution.hpp92
-rw-r--r--boost/compute/random/default_random_engine.hpp24
-rw-r--r--boost/compute/random/discrete_distribution.hpp117
-rw-r--r--boost/compute/random/linear_congruential_engine.hpp238
-rw-r--r--boost/compute/random/mersenne_twister_engine.hpp254
-rw-r--r--boost/compute/random/normal_distribution.hpp124
-rw-r--r--boost/compute/random/threefry_engine.hpp311
-rw-r--r--boost/compute/random/uniform_int_distribution.hpp111
-rw-r--r--boost/compute/random/uniform_real_distribution.hpp105
-rw-r--r--boost/compute/source.hpp12
-rw-r--r--boost/compute/svm.hpp62
-rw-r--r--boost/compute/system.hpp278
-rw-r--r--boost/compute/type_traits.hpp25
-rw-r--r--boost/compute/type_traits/common_type.hpp55
-rw-r--r--boost/compute/type_traits/detail/capture_traits.hpp33
-rw-r--r--boost/compute/type_traits/is_device_iterator.hpp39
-rw-r--r--boost/compute/type_traits/is_fundamental.hpp58
-rw-r--r--boost/compute/type_traits/is_vector_type.hpp38
-rw-r--r--boost/compute/type_traits/make_vector_type.hpp71
-rw-r--r--boost/compute/type_traits/result_of.hpp39
-rw-r--r--boost/compute/type_traits/scalar_type.hpp72
-rw-r--r--boost/compute/type_traits/type_definition.hpp39
-rw-r--r--boost/compute/type_traits/type_name.hpp124
-rw-r--r--boost/compute/type_traits/vector_size.hpp65
-rw-r--r--boost/compute/types.hpp24
-rw-r--r--boost/compute/types/builtin.hpp12
-rw-r--r--boost/compute/types/complex.hpp196
-rw-r--r--boost/compute/types/fundamental.hpp172
-rw-r--r--boost/compute/types/pair.hpp117
-rw-r--r--boost/compute/types/struct.hpp173
-rw-r--r--boost/compute/types/tuple.hpp220
-rw-r--r--boost/compute/user_event.hpp88
-rw-r--r--boost/compute/utility.hpp21
-rw-r--r--boost/compute/utility/dim.hpp76
-rw-r--r--boost/compute/utility/extents.hpp164
-rw-r--r--boost/compute/utility/invoke.hpp71
-rw-r--r--boost/compute/utility/program_cache.hpp172
-rw-r--r--boost/compute/utility/source.hpp39
-rw-r--r--boost/compute/utility/wait_list.hpp203
-rw-r--r--boost/compute/version.hpp18
-rw-r--r--boost/compute/wait_list.hpp12
309 files changed, 38849 insertions, 0 deletions
diff --git a/boost/compute/algorithm.hpp b/boost/compute/algorithm.hpp
new file mode 100644
index 0000000000..686640e9e9
--- /dev/null
+++ b/boost/compute/algorithm.hpp
@@ -0,0 +1,94 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_HPP
+#define BOOST_COMPUTE_ALGORITHM_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute algorithm headers.
+
+#include <boost/compute/algorithm/accumulate.hpp>
+#include <boost/compute/algorithm/adjacent_difference.hpp>
+#include <boost/compute/algorithm/adjacent_find.hpp>
+#include <boost/compute/algorithm/all_of.hpp>
+#include <boost/compute/algorithm/any_of.hpp>
+#include <boost/compute/algorithm/binary_search.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/algorithm/count.hpp>
+#include <boost/compute/algorithm/count_if.hpp>
+#include <boost/compute/algorithm/equal.hpp>
+#include <boost/compute/algorithm/equal_range.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/algorithm/find_end.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+#include <boost/compute/algorithm/find_if_not.hpp>
+#include <boost/compute/algorithm/for_each.hpp>
+#include <boost/compute/algorithm/for_each_n.hpp>
+#include <boost/compute/algorithm/gather.hpp>
+#include <boost/compute/algorithm/generate.hpp>
+#include <boost/compute/algorithm/generate_n.hpp>
+#include <boost/compute/algorithm/inclusive_scan.hpp>
+#include <boost/compute/algorithm/includes.hpp>
+#include <boost/compute/algorithm/inner_product.hpp>
+#include <boost/compute/algorithm/iota.hpp>
+#include <boost/compute/algorithm/is_partitioned.hpp>
+#include <boost/compute/algorithm/is_permutation.hpp>
+#include <boost/compute/algorithm/is_sorted.hpp>
+#include <boost/compute/algorithm/lower_bound.hpp>
+#include <boost/compute/algorithm/lexicographical_compare.hpp>
+#include <boost/compute/algorithm/max_element.hpp>
+#include <boost/compute/algorithm/merge.hpp>
+#include <boost/compute/algorithm/min_element.hpp>
+#include <boost/compute/algorithm/minmax_element.hpp>
+#include <boost/compute/algorithm/mismatch.hpp>
+#include <boost/compute/algorithm/next_permutation.hpp>
+#include <boost/compute/algorithm/none_of.hpp>
+#include <boost/compute/algorithm/partial_sum.hpp>
+#include <boost/compute/algorithm/partition.hpp>
+#include <boost/compute/algorithm/partition_copy.hpp>
+#include <boost/compute/algorithm/partition_point.hpp>
+#include <boost/compute/algorithm/prev_permutation.hpp>
+#include <boost/compute/algorithm/random_shuffle.hpp>
+#include <boost/compute/algorithm/reduce.hpp>
+#include <boost/compute/algorithm/reduce_by_key.hpp>
+#include <boost/compute/algorithm/remove.hpp>
+#include <boost/compute/algorithm/remove_if.hpp>
+#include <boost/compute/algorithm/replace.hpp>
+#include <boost/compute/algorithm/replace_copy.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+#include <boost/compute/algorithm/reverse_copy.hpp>
+#include <boost/compute/algorithm/rotate.hpp>
+#include <boost/compute/algorithm/rotate_copy.hpp>
+#include <boost/compute/algorithm/scatter.hpp>
+#include <boost/compute/algorithm/search.hpp>
+#include <boost/compute/algorithm/search_n.hpp>
+#include <boost/compute/algorithm/set_difference.hpp>
+#include <boost/compute/algorithm/set_intersection.hpp>
+#include <boost/compute/algorithm/set_symmetric_difference.hpp>
+#include <boost/compute/algorithm/set_union.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/algorithm/sort_by_key.hpp>
+#include <boost/compute/algorithm/stable_partition.hpp>
+#include <boost/compute/algorithm/stable_sort.hpp>
+#include <boost/compute/algorithm/stable_sort_by_key.hpp>
+#include <boost/compute/algorithm/swap_ranges.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/algorithm/transform_reduce.hpp>
+#include <boost/compute/algorithm/unique.hpp>
+#include <boost/compute/algorithm/unique_copy.hpp>
+#include <boost/compute/algorithm/upper_bound.hpp>
+
+#endif // BOOST_COMPUTE_ALGORITHM_HPP
diff --git a/boost/compute/algorithm/accumulate.hpp b/boost/compute/algorithm/accumulate.hpp
new file mode 100644
index 0000000000..328420a07c
--- /dev/null
+++ b/boost/compute/algorithm/accumulate.hpp
@@ -0,0 +1,184 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
+#define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
+
+#include <boost/preprocessor/seq/for_each.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/reduce.hpp>
+#include <boost/compute/algorithm/detail/serial_accumulate.hpp>
+#include <boost/compute/container/array.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class T, class BinaryFunction>
+inline T generic_accumulate(InputIterator first,
+ InputIterator last,
+ T init,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ const context &context = queue.get_context();
+
+ size_t size = iterator_range_size(first, last);
+ if(size == 0){
+ return init;
+ }
+
+ // accumulate on device
+ array<T, 1> device_result(context);
+ detail::serial_accumulate(
+ first, last, device_result.begin(), init, function, queue
+ );
+
+ // copy result to host
+ T result;
+ ::boost::compute::copy_n(device_result.begin(), 1, &result, queue);
+ return result;
+}
+
+// returns true if we can use reduce() instead of accumulate() when
+// accumulate() this is true when the function is commutative (such as
+// addition of integers) and the initial value is the identity value
+// for the operation (zero for addition, one for multiplication).
+template<class T, class F>
+inline bool can_accumulate_with_reduce(T init, F function)
+{
+ (void) init;
+ (void) function;
+
+ return false;
+}
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \
+ inline bool can_accumulate_with_reduce(type init, plus<type>) \
+ { \
+ return init == type(0); \
+ } \
+ inline bool can_accumulate_with_reduce(type init, multiplies<type>) \
+ { \
+ return init == type(1); \
+ }
+
+BOOST_PP_SEQ_FOR_EACH(
+ BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE,
+ _,
+ (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_)
+)
+
+template<class T>
+inline bool can_accumulate_with_reduce(T init, min<T>)
+{
+ return init == (std::numeric_limits<T>::max)();
+}
+
+template<class T>
+inline bool can_accumulate_with_reduce(T init, max<T>)
+{
+ return init == (std::numeric_limits<T>::min)();
+}
+
+#undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE
+
+template<class InputIterator, class T, class BinaryFunction>
+inline T dispatch_accumulate(InputIterator first,
+ InputIterator last,
+ T init,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ size_t size = iterator_range_size(first, last);
+ if(size == 0){
+ return init;
+ }
+
+ if(can_accumulate_with_reduce(init, function)){
+ T result;
+ reduce(first, last, &result, function, queue);
+ return result;
+ }
+ else {
+ return generic_accumulate(first, last, init, function, queue);
+ }
+}
+
+} // end detail namespace
+
+/// Returns the result of applying \p function to the elements in the
+/// range [\p first, \p last) and \p init.
+///
+/// If no function is specified, \c plus will be used.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param init initial value
+/// \param function binary reduction function
+/// \param queue command queue to perform the operation
+///
+/// \return the accumulated result value
+///
+/// In specific situations the call to \c accumulate() can be automatically
+/// optimized to a call to the more efficient \c reduce() algorithm. This
+/// occurs when the binary reduction function is recognized as associative
+/// (such as the \c plus<int> function).
+///
+/// Note that because floating-point addition is not associative, calling
+/// \c accumulate() with \c plus<float> results in a less efficient serial
+/// reduction algorithm being executed. If a slight loss in precision is
+/// acceptable, the more efficient parallel \c reduce() algorithm should be
+/// used instead.
+///
+/// For example:
+/// \code
+/// // with vec = boost::compute::vector<int>
+/// accumulate(vec.begin(), vec.end(), 0, plus<int>()); // fast
+/// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast
+///
+/// // with vec = boost::compute::vector<float>
+/// accumulate(vec.begin(), vec.end(), 0, plus<float>()); // slow
+/// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast
+/// \endcode
+///
+/// \see reduce()
+template<class InputIterator, class T, class BinaryFunction>
+inline T accumulate(InputIterator first,
+ InputIterator last,
+ T init,
+ BinaryFunction function,
+ command_queue &queue = system::default_queue())
+{
+ return detail::dispatch_accumulate(first, last, init, function, queue);
+}
+
+/// \overload
+template<class InputIterator, class T>
+inline T accumulate(InputIterator first,
+ InputIterator last,
+ T init,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type IT;
+
+ return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
diff --git a/boost/compute/algorithm/adjacent_difference.hpp b/boost/compute/algorithm/adjacent_difference.hpp
new file mode 100644
index 0000000000..a8f84e020e
--- /dev/null
+++ b/boost/compute/algorithm/adjacent_difference.hpp
@@ -0,0 +1,98 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/functional/operator.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Stores the difference of each pair of consecutive values in the range
+/// [\p first, \p last) to the range beginning at \p result. If \p op is not
+/// provided, \c minus<T> is used.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param result first element in the output range
+/// \param op binary difference function
+/// \param queue command queue to perform the operation
+///
+/// \return \c OutputIterator to the end of the result range
+///
+/// \see adjacent_find()
+template<class InputIterator, class OutputIterator, class BinaryFunction>
+inline OutputIterator
+adjacent_difference(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryFunction op,
+ command_queue &queue = system::default_queue())
+{
+ if(first == last){
+ return result;
+ }
+
+ size_t count = detail::iterator_range_size(first, last);
+
+ detail::meta_kernel k("adjacent_difference");
+
+ k << "const uint i = get_global_id(0);\n"
+ << "if(i == 0){\n"
+ << " " << result[k.var<uint_>("0")] << " = " << first[k.var<uint_>("0")] << ";\n"
+ << "}\n"
+ << "else {\n"
+ << " " << result[k.var<uint_>("i")] << " = "
+ << op(first[k.var<uint_>("i")], first[k.var<uint_>("i-1")]) << ";\n"
+ << "}\n";
+
+ k.exec_1d(queue, 0, count, 1);
+
+ return result + count;
+}
+
+/// \overload
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+adjacent_difference(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ if (first == result) {
+ vector<value_type> temp(detail::iterator_range_size(first, last),
+ queue.get_context());
+ copy(first, last, temp.begin(), queue);
+
+ return ::boost::compute::adjacent_difference(
+ temp.begin(), temp.end(), result, ::boost::compute::minus<value_type>(), queue
+ );
+ }
+ else {
+ return ::boost::compute::adjacent_difference(
+ first, last, result, ::boost::compute::minus<value_type>(), queue
+ );
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP
diff --git a/boost/compute/algorithm/adjacent_find.hpp b/boost/compute/algorithm/adjacent_find.hpp
new file mode 100644
index 0000000000..992a01eddc
--- /dev/null
+++ b/boost/compute/algorithm/adjacent_find.hpp
@@ -0,0 +1,162 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP
+#define BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP
+
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/functional/operator.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Compare>
+inline InputIterator
+serial_adjacent_find(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue)
+{
+ if(first == last){
+ return last;
+ }
+
+ const context &context = queue.get_context();
+
+ detail::scalar<uint_> output(context);
+
+ detail::meta_kernel k("serial_adjacent_find");
+
+ size_t size_arg = k.add_arg<const uint_>("size");
+ size_t output_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output");
+
+ k << k.decl<uint_>("result") << " = size;\n"
+ << "for(uint i = 0; i < size - 1; i++){\n"
+ << " if(" << compare(first[k.expr<uint_>("i")],
+ first[k.expr<uint_>("i+1")]) << "){\n"
+ << " result = i;\n"
+ << " break;\n"
+ << " }\n"
+ << "}\n"
+ << "*output = result;\n";
+
+ k.set_arg<const uint_>(
+ size_arg, static_cast<uint_>(detail::iterator_range_size(first, last))
+ );
+ k.set_arg(output_arg, output.get_buffer());
+
+ k.exec_1d(queue, 0, 1, 1);
+
+ return first + output.read(queue);
+}
+
+template<class InputIterator, class Compare>
+inline InputIterator
+adjacent_find_with_atomics(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue)
+{
+ if(first == last){
+ return last;
+ }
+
+ const context &context = queue.get_context();
+ size_t count = detail::iterator_range_size(first, last);
+
+ // initialize output to the last index
+ detail::scalar<uint_> output(context);
+ output.write(static_cast<uint_>(count), queue);
+
+ detail::meta_kernel k("adjacent_find_with_atomics");
+
+ size_t output_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output");
+
+ k << "const uint i = get_global_id(0);\n"
+ << "if(" << compare(first[k.expr<uint_>("i")],
+ first[k.expr<uint_>("i+1")]) << "){\n"
+ << " atomic_min(output, i);\n"
+ << "}\n";
+
+ k.set_arg(output_arg, output.get_buffer());
+
+ k.exec_1d(queue, 0, count - 1, 1);
+
+ return first + output.read(queue);
+}
+
+} // end detail namespace
+
+/// Searches the range [\p first, \p last) for two identical adjacent
+/// elements and returns an iterator pointing to the first.
+///
+/// \param first first element in the range to search
+/// \param last last element in the range to search
+/// \param compare binary comparison function
+/// \param queue command queue to perform the operation
+///
+/// \return \c InputIteratorm to the first element which compares equal
+/// to the following element. If none are equal, returns \c last.
+///
+/// \see find(), adjacent_difference()
+template<class InputIterator, class Compare>
+inline InputIterator
+adjacent_find(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count < 32){
+ return detail::serial_adjacent_find(first, last, compare, queue);
+ }
+ else {
+ return detail::adjacent_find_with_atomics(first, last, compare, queue);
+ }
+}
+
+/// \overload
+template<class InputIterator>
+inline InputIterator
+adjacent_find(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ using ::boost::compute::lambda::_1;
+ using ::boost::compute::lambda::_2;
+ using ::boost::compute::lambda::all;
+
+ if(vector_size<value_type>::value == 1){
+ return ::boost::compute::adjacent_find(
+ first, last, _1 == _2, queue
+ );
+ }
+ else {
+ return ::boost::compute::adjacent_find(
+ first, last, all(_1 == _2), queue
+ );
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP
diff --git a/boost/compute/algorithm/all_of.hpp b/boost/compute/algorithm/all_of.hpp
new file mode 100644
index 0000000000..34d7518f32
--- /dev/null
+++ b/boost/compute/algorithm/all_of.hpp
@@ -0,0 +1,36 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP
+#define BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/find_if_not.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if \p predicate returns \c true for all of the elements in
+/// the range [\p first, \p last).
+///
+/// \see any_of(), none_of()
+template<class InputIterator, class UnaryPredicate>
+inline bool all_of(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::find_if_not(first, last, predicate, queue) == last;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP
diff --git a/boost/compute/algorithm/any_of.hpp b/boost/compute/algorithm/any_of.hpp
new file mode 100644
index 0000000000..b07779597c
--- /dev/null
+++ b/boost/compute/algorithm/any_of.hpp
@@ -0,0 +1,40 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP
+#define BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if \p predicate returns \c true for any of the elements in
+/// the range [\p first, \p last).
+///
+/// For example, to test if a vector contains any negative values:
+///
+/// \snippet test/test_any_all_none_of.cpp any_of
+///
+/// \see all_of(), none_of()
+template<class InputIterator, class UnaryPredicate>
+inline bool any_of(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::find_if(first, last, predicate, queue) != last;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP
diff --git a/boost/compute/algorithm/binary_search.hpp b/boost/compute/algorithm/binary_search.hpp
new file mode 100644
index 0000000000..6e19498790
--- /dev/null
+++ b/boost/compute/algorithm/binary_search.hpp
@@ -0,0 +1,37 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP
+#define BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/lower_bound.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if \p value is in the sorted range [\p first,
+/// \p last).
+template<class InputIterator, class T>
+inline bool binary_search(InputIterator first,
+ InputIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ InputIterator position = lower_bound(first, last, value, queue);
+
+ return position != last && position.read(queue) == value;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP
diff --git a/boost/compute/algorithm/copy.hpp b/boost/compute/algorithm/copy.hpp
new file mode 100644
index 0000000000..2a25059bba
--- /dev/null
+++ b/boost/compute/algorithm/copy.hpp
@@ -0,0 +1,362 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
+#define BOOST_COMPUTE_ALGORITHM_COPY_HPP
+
+#include <algorithm>
+#include <iterator>
+
+#include <boost/utility/enable_if.hpp>
+
+#include <boost/mpl/and.hpp>
+#include <boost/mpl/not.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/copy_on_device.hpp>
+#include <boost/compute/algorithm/detail/copy_to_device.hpp>
+#include <boost/compute/algorithm/detail/copy_to_host.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/detail/is_contiguous_iterator.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+namespace mpl = boost::mpl;
+
+// meta-function returning true if copy() between InputIterator and
+// OutputIterator can be implemented with clEnqueueCopyBuffer().
+template<class InputIterator, class OutputIterator>
+struct can_copy_with_copy_buffer :
+ mpl::and_<
+ boost::is_same<
+ InputIterator,
+ buffer_iterator<typename InputIterator::value_type>
+ >,
+ boost::is_same<
+ OutputIterator,
+ buffer_iterator<typename OutputIterator::value_type>
+ >,
+ boost::is_same<
+ typename InputIterator::value_type,
+ typename OutputIterator::value_type
+ >
+ >::type {};
+
+// host -> device
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+dispatch_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ !is_device_iterator<InputIterator>::value &&
+ is_device_iterator<OutputIterator>::value
+ >::type* = 0)
+{
+ if(is_contiguous_iterator<InputIterator>::value){
+ return copy_to_device(first, last, result, queue);
+ }
+ else {
+ // for non-contiguous input we first copy the values to
+ // a temporary std::vector and then copy from there
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+ std::vector<T> vector(first, last);
+ return copy_to_device(vector.begin(), vector.end(), result, queue);
+ }
+}
+
+// host -> device (async)
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator>
+dispatch_copy_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ !is_device_iterator<InputIterator>::value &&
+ is_device_iterator<OutputIterator>::value
+ >::type* = 0)
+{
+ BOOST_STATIC_ASSERT_MSG(
+ is_contiguous_iterator<InputIterator>::value,
+ "copy_async() is only supported for contiguous host iterators"
+ );
+
+ return copy_to_device_async(first, last, result, queue);
+}
+
+// device -> host
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+dispatch_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ is_device_iterator<InputIterator>::value &&
+ !is_device_iterator<OutputIterator>::value
+ >::type* = 0)
+{
+ if(is_contiguous_iterator<OutputIterator>::value){
+ return copy_to_host(first, last, result, queue);
+ }
+ else {
+ // for non-contiguous input we first copy the values to
+ // a temporary std::vector and then copy from there
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+ std::vector<T> vector(iterator_range_size(first, last));
+ copy_to_host(first, last, vector.begin(), queue);
+ return std::copy(vector.begin(), vector.end(), result);
+ }
+}
+
+// device -> host (async)
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator>
+dispatch_copy_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ is_device_iterator<InputIterator>::value &&
+ !is_device_iterator<OutputIterator>::value
+ >::type* = 0)
+{
+ BOOST_STATIC_ASSERT_MSG(
+ is_contiguous_iterator<OutputIterator>::value,
+ "copy_async() is only supported for contiguous host iterators"
+ );
+
+ return copy_to_host_async(first, last, result, queue);
+}
+
+// device -> device
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+dispatch_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if<
+ mpl::and_<
+ is_device_iterator<InputIterator>,
+ is_device_iterator<OutputIterator>,
+ mpl::not_<
+ can_copy_with_copy_buffer<
+ InputIterator, OutputIterator
+ >
+ >
+ >
+ >::type* = 0)
+{
+ return copy_on_device(first, last, result, queue);
+}
+
+// device -> device (specialization for buffer iterators)
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+dispatch_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if<
+ mpl::and_<
+ is_device_iterator<InputIterator>,
+ is_device_iterator<OutputIterator>,
+ can_copy_with_copy_buffer<
+ InputIterator, OutputIterator
+ >
+ >
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ difference_type n = std::distance(first, last);
+ if(n < 1){
+ // nothing to copy
+ return result;
+ }
+
+ queue.enqueue_copy_buffer(first.get_buffer(),
+ result.get_buffer(),
+ first.get_index() * sizeof(value_type),
+ result.get_index() * sizeof(value_type),
+ static_cast<size_t>(n) * sizeof(value_type));
+ return result + n;
+}
+
+// device -> device (async)
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator>
+dispatch_copy_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if<
+ mpl::and_<
+ is_device_iterator<InputIterator>,
+ is_device_iterator<OutputIterator>,
+ mpl::not_<
+ can_copy_with_copy_buffer<
+ InputIterator, OutputIterator
+ >
+ >
+ >
+ >::type* = 0)
+{
+ return copy_on_device_async(first, last, result, queue);
+}
+
+// device -> device (async, specialization for buffer iterators)
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator>
+dispatch_copy_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if<
+ mpl::and_<
+ is_device_iterator<InputIterator>,
+ is_device_iterator<OutputIterator>,
+ can_copy_with_copy_buffer<
+ InputIterator, OutputIterator
+ >
+ >
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ difference_type n = std::distance(first, last);
+ if(n < 1){
+ // nothing to copy
+ return make_future(result, event());
+ }
+
+ event event_ =
+ queue.enqueue_copy_buffer(
+ first.get_buffer(),
+ result.get_buffer(),
+ first.get_index() * sizeof(value_type),
+ result.get_index() * sizeof(value_type),
+ static_cast<size_t>(n) * sizeof(value_type)
+ );
+
+ return make_future(result + n, event_);
+}
+
+// host -> host
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+dispatch_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ !is_device_iterator<InputIterator>::value &&
+ !is_device_iterator<OutputIterator>::value
+ >::type* = 0)
+{
+ (void) queue;
+
+ return std::copy(first, last, result);
+}
+
+} // end detail namespace
+
+/// Copies the values in the range [\p first, \p last) to the range
+/// beginning at \p result.
+///
+/// The generic copy() function can be used for a variety of data
+/// transfer tasks and provides a standard interface to the following
+/// OpenCL functions:
+///
+/// \li \c clEnqueueReadBuffer()
+/// \li \c clEnqueueWriteBuffer()
+/// \li \c clEnqueueCopyBuffer()
+///
+/// Unlike the aforementioned OpenCL functions, copy() will also work
+/// with non-contiguous data-structures (e.g. \c std::list<T>) as
+/// well as with "fancy" iterators (e.g. transform_iterator).
+///
+/// \param first first element in the range to copy
+/// \param last last element in the range to copy
+/// \param result first element in the result range
+/// \param queue command queue to perform the operation
+///
+/// \return \c OutputIterator to the end of the result range
+///
+/// For example, to copy an array of \c int values on the host to a vector on
+/// the device:
+/// \code
+/// // array on the host
+/// int data[] = { 1, 2, 3, 4 };
+///
+/// // vector on the device
+/// boost::compute::vector<int> vec(4, context);
+///
+/// // copy values to the device vector
+/// boost::compute::copy(data, data + 4, vec.begin(), queue);
+/// \endcode
+///
+/// The copy algorithm can also be used with standard containers such as
+/// \c std::vector<T>:
+/// \code
+/// std::vector<int> host_vector = ...
+/// boost::compute::vector<int> device_vector = ...
+///
+/// // copy from the host to the device
+/// boost::compute::copy(
+/// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+/// );
+///
+/// // copy from the device to the host
+/// boost::compute::copy(
+/// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
+/// );
+/// \endcode
+///
+/// \see copy_n(), copy_if(), copy_async()
+template<class InputIterator, class OutputIterator>
+inline OutputIterator copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ return detail::dispatch_copy(first, last, result, queue);
+}
+
+/// Copies the values in the range [\p first, \p last) to the range
+/// beginning at \p result. The copy is performed asynchronously.
+///
+/// \see copy()
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator>
+copy_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ return detail::dispatch_copy_async(first, last, result, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP
diff --git a/boost/compute/algorithm/copy_if.hpp b/boost/compute/algorithm/copy_if.hpp
new file mode 100644
index 0000000000..3cd08ef293
--- /dev/null
+++ b/boost/compute/algorithm/copy_if.hpp
@@ -0,0 +1,58 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP
+
+#include <boost/compute/algorithm/transform_if.hpp>
+#include <boost/compute/functional/identity.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// like the copy_if() algorithm but writes the indices of the values for which
+// predicate returns true.
+template<class InputIterator, class OutputIterator, class Predicate>
+inline OutputIterator copy_index_if(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ Predicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+ return detail::transform_if_impl(
+ first, last, result, identity<T>(), predicate, true, queue
+ );
+}
+
+} // end detail namespace
+
+/// Copies each element in the range [\p first, \p last) for which
+/// \p predicate returns \c true to the range beginning at \p result.
+template<class InputIterator, class OutputIterator, class Predicate>
+inline OutputIterator copy_if(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ Predicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+ return ::boost::compute::transform_if(
+ first, last, result, identity<T>(), predicate, queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP
diff --git a/boost/compute/algorithm/copy_n.hpp b/boost/compute/algorithm/copy_n.hpp
new file mode 100644
index 0000000000..f0989edc67
--- /dev/null
+++ b/boost/compute/algorithm/copy_n.hpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_COPY_N_HPP
+#define BOOST_COMPUTE_ALGORITHM_COPY_N_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Copies \p count elements from \p first to \p result.
+///
+/// For example, to copy four values from the host to the device:
+/// \code
+/// // values on the host and vector on the device
+/// float values[4] = { 1.f, 2.f, 3.f, 4.f };
+/// boost::compute::vector<float> vec(4, context);
+///
+/// // copy from the host to the device
+/// boost::compute::copy_n(values, 4, vec.begin(), queue);
+/// \endcode
+///
+/// \see copy()
+template<class InputIterator, class Size, class OutputIterator>
+inline OutputIterator copy_n(InputIterator first,
+ Size count,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ return ::boost::compute::copy(first,
+ first + static_cast<difference_type>(count),
+ result,
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_COPY_N_HPP
diff --git a/boost/compute/algorithm/count.hpp b/boost/compute/algorithm/count.hpp
new file mode 100644
index 0000000000..140d67379f
--- /dev/null
+++ b/boost/compute/algorithm/count.hpp
@@ -0,0 +1,55 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_COUNT_HPP
+#define BOOST_COMPUTE_ALGORITHM_COUNT_HPP
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/count_if.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns the number of occurrences of \p value in the range
+/// [\p first, \p last).
+///
+/// \see count_if()
+template<class InputIterator, class T>
+inline size_t count(InputIterator first,
+ InputIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ using ::boost::compute::_1;
+ using ::boost::compute::lambda::all;
+
+ if(vector_size<value_type>::value == 1){
+ return ::boost::compute::count_if(first,
+ last,
+ _1 == value,
+ queue);
+ }
+ else {
+ return ::boost::compute::count_if(first,
+ last,
+ all(_1 == value),
+ queue);
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_COUNT_HPP
diff --git a/boost/compute/algorithm/count_if.hpp b/boost/compute/algorithm/count_if.hpp
new file mode 100644
index 0000000000..c9381ce5d4
--- /dev/null
+++ b/boost/compute/algorithm/count_if.hpp
@@ -0,0 +1,62 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP
+
+#include <boost/compute/device.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/count_if_with_ballot.hpp>
+#include <boost/compute/algorithm/detail/count_if_with_reduce.hpp>
+#include <boost/compute/algorithm/detail/count_if_with_threads.hpp>
+#include <boost/compute/algorithm/detail/serial_count_if.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns the number of elements in the range [\p first, \p last)
+/// for which \p predicate returns \c true.
+template<class InputIterator, class Predicate>
+inline size_t count_if(InputIterator first,
+ InputIterator last,
+ Predicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ const device &device = queue.get_device();
+
+ size_t input_size = detail::iterator_range_size(first, last);
+ if(input_size == 0){
+ return 0;
+ }
+
+ if(device.type() & device::cpu){
+ if(input_size < 1024){
+ return detail::serial_count_if(first, last, predicate, queue);
+ }
+ else {
+ return detail::count_if_with_threads(first, last, predicate, queue);
+ }
+ }
+ else {
+ if(input_size < 32){
+ return detail::serial_count_if(first, last, predicate, queue);
+ }
+ else {
+ return detail::count_if_with_reduce(first, last, predicate, queue);
+ }
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP
diff --git a/boost/compute/algorithm/detail/balanced_path.hpp b/boost/compute/algorithm/detail/balanced_path.hpp
new file mode 100644
index 0000000000..e5025532d3
--- /dev/null
+++ b/boost/compute/algorithm/detail/balanced_path.hpp
@@ -0,0 +1,162 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/find_if.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Balanced Path kernel class
+///
+/// Subclass of meta_kernel to break two sets into tiles according
+/// to their balanced path.
+///
+class balanced_path_kernel : public meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ balanced_path_kernel() : meta_kernel("balanced_path")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class OutputIterator1, class OutputIterator2,
+ class Compare>
+ void set_range(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator1 result_a,
+ OutputIterator2 result_b,
+ Compare comp)
+ {
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ m_a_count = iterator_range_size(first1, last1);
+ m_a_count_arg = add_arg<uint_>("a_count");
+
+ m_b_count = iterator_range_size(first2, last2);
+ m_b_count_arg = add_arg<uint_>("b_count");
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint target = (i+1)*" << tile_size << ";\n" <<
+ "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" <<
+ "uint end = min(target,a_count);\n" <<
+ "uint a_index, b_index;\n" <<
+ "while(start<end)\n" <<
+ "{\n" <<
+ " a_index = (start + end)/2;\n" <<
+ " b_index = target - a_index - 1;\n" <<
+ " if(!(" << comp(first2[expr<uint_>("b_index")],
+ first1[expr<uint_>("a_index")]) << "))\n" <<
+ " start = a_index + 1;\n" <<
+ " else end = a_index;\n" <<
+ "}\n" <<
+ "a_index = start;\n" <<
+ "b_index = target - start;\n" <<
+ "if(b_index < b_count)\n" <<
+ "{\n" <<
+ " " << decl<const value_type>("x") << " = " <<
+ first2[expr<uint_>("b_index")] << ";\n" <<
+ " uint a_start = 0, a_end = a_index, a_mid;\n" <<
+ " uint b_start = 0, b_end = b_index, b_mid;\n" <<
+ " while(a_start<a_end)\n" <<
+ " {\n" <<
+ " a_mid = (a_start + a_end)/2;\n" <<
+ " if(" << comp(first1[expr<uint_>("a_mid")], expr<value_type>("x")) << ")\n" <<
+ " a_start = a_mid+1;\n" <<
+ " else a_end = a_mid;\n" <<
+ " }\n" <<
+ " while(b_start<b_end)\n" <<
+ " {\n" <<
+ " b_mid = (b_start + b_end)/2;\n" <<
+ " if(" << comp(first2[expr<uint_>("b_mid")], expr<value_type>("x")) << ")\n" <<
+ " b_start = b_mid+1;\n" <<
+ " else b_end = b_mid;\n" <<
+ " }\n" <<
+ " uint a_run = a_index - a_start;\n" <<
+ " uint b_run = b_index - b_start;\n" <<
+ " uint x_count = a_run + b_run;\n" <<
+ " uint b_advance = max(x_count / 2, x_count - a_run);\n" <<
+ " b_end = min(b_count, b_start + b_advance + 1);\n" <<
+ " uint temp_start = b_index, temp_end = b_end, temp_mid;" <<
+ " while(temp_start < temp_end)\n" <<
+ " {\n" <<
+ " temp_mid = (temp_start + temp_end + 1)/2;\n" <<
+ " if(" << comp(expr<value_type>("x"), first2[expr<uint_>("temp_mid")]) << ")\n" <<
+ " temp_end = temp_mid-1;\n" <<
+ " else temp_start = temp_mid;\n" <<
+ " }\n" <<
+ " b_run = temp_start - b_start + 1;\n" <<
+ " b_advance = min(b_advance, b_run);\n" <<
+ " uint a_advance = x_count - b_advance;\n" <<
+ " uint star = convert_uint((a_advance == b_advance + 1) " <<
+ "&& (b_advance < b_run));\n" <<
+ " a_index = a_start + a_advance;\n" <<
+ " b_index = target - a_index + star;\n" <<
+ "}\n" <<
+ result_a[expr<uint_>("i")] << " = a_index;\n" <<
+ result_b[expr<uint_>("i")] << " = b_index;\n";
+
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class OutputIterator1, class OutputIterator2>
+ void set_range(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator1 result_a,
+ OutputIterator2 result_b)
+ {
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+ ::boost::compute::less<value_type> less_than;
+ set_range(first1, last1, first2, last2, result_a, result_b, less_than);
+ }
+
+ event exec(command_queue &queue)
+ {
+ if((m_a_count + m_b_count)/tile_size == 0) {
+ return event();
+ }
+
+ set_arg(m_a_count_arg, uint_(m_a_count));
+ set_arg(m_b_count_arg, uint_(m_b_count));
+
+ return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size);
+ }
+
+private:
+ size_t m_a_count;
+ size_t m_a_count_arg;
+ size_t m_b_count;
+ size_t m_b_count_arg;
+};
+
+} //end detail namespace
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP
diff --git a/boost/compute/algorithm/detail/binary_find.hpp b/boost/compute/algorithm/detail/binary_find.hpp
new file mode 100644
index 0000000000..27fa11fbaf
--- /dev/null
+++ b/boost/compute/algorithm/detail/binary_find.hpp
@@ -0,0 +1,133 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail{
+
+///
+/// \brief Binary find kernel class
+///
+/// Subclass of meta_kernel to perform single step in binary find.
+///
+template<class InputIterator, class UnaryPredicate>
+class binary_find_kernel : public meta_kernel
+{
+public:
+ binary_find_kernel(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate)
+ : meta_kernel("binary_find")
+ {
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ m_index_arg = add_arg<uint_ *>(memory_object::global_memory, "index");
+ m_block_arg = add_arg<uint_>("block");
+
+ atomic_min<uint_> atomic_min_uint;
+
+ *this <<
+ "uint i = get_global_id(0) * block;\n" <<
+ decl<value_type>("value") << "=" << first[var<uint_>("i")] << ";\n" <<
+ "if(" << predicate(var<value_type>("value")) << ") {\n" <<
+ atomic_min_uint(var<uint_ *>("index"), var<uint_>("i")) << ";\n" <<
+ "}\n";
+ }
+
+ size_t m_index_arg;
+ size_t m_block_arg;
+};
+
+///
+/// \brief Binary find algorithm
+///
+/// Finds the end of true values in the partitioned range [first, last).
+/// \return Iterator pointing to end of true values
+///
+/// \param first Iterator pointing to start of range
+/// \param last Iterator pointing to end of range
+/// \param predicate Predicate according to which the range is partitioned
+/// \param queue Queue on which to execute
+///
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator binary_find(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ const device &device = queue.get_device();
+
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ const std::string cache_key = "__boost_binary_find";
+
+ size_t find_if_limit = 128;
+ size_t threads = parameters->get(cache_key, "tpb", 128);
+ size_t count = iterator_range_size(first, last);
+
+ InputIterator search_first = first;
+ InputIterator search_last = last;
+
+ scalar<uint_> index(queue.get_context());
+
+ // construct and compile binary_find kernel
+ binary_find_kernel<InputIterator, UnaryPredicate>
+ binary_find_kernel(search_first, search_last, predicate);
+ ::boost::compute::kernel kernel = binary_find_kernel.compile(queue.get_context());
+
+ // set buffer for index
+ kernel.set_arg(binary_find_kernel.m_index_arg, index.get_buffer());
+
+ while(count > find_if_limit) {
+ index.write(static_cast<uint_>(count), queue);
+
+ // set block and run binary_find kernel
+ uint_ block = static_cast<uint_>((count - 1)/(threads - 1));
+ kernel.set_arg(binary_find_kernel.m_block_arg, block);
+ queue.enqueue_1d_range_kernel(kernel, 0, threads, 0);
+
+ size_t i = index.read(queue);
+
+ if(i == count) {
+ search_first = search_last - ((count - 1)%(threads - 1));
+ break;
+ } else {
+ search_last = search_first + i;
+ search_first = search_last - ((count - 1)/(threads - 1));
+ }
+
+ // Make sure that first and last stay within the input range
+ search_last = (std::min)(search_last, last);
+ search_last = (std::max)(search_last, first);
+
+ search_first = (std::max)(search_first, first);
+ search_first = (std::min)(search_first, last);
+
+ count = iterator_range_size(search_first, search_last);
+ }
+
+ return find_if(search_first, search_last, predicate, queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP
diff --git a/boost/compute/algorithm/detail/compact.hpp b/boost/compute/algorithm/detail/compact.hpp
new file mode 100644
index 0000000000..983352d543
--- /dev/null
+++ b/boost/compute/algorithm/detail/compact.hpp
@@ -0,0 +1,77 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP
+
+#include <iterator>
+
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Compact kernel class
+///
+/// Subclass of meta_kernel to compact the result of set kernels to
+/// get actual sets
+///
+class compact_kernel : public meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ compact_kernel() : meta_kernel("compact")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2, class OutputIterator>
+ void set_range(InputIterator1 start,
+ InputIterator2 counts_begin,
+ InputIterator2 counts_end,
+ OutputIterator result)
+ {
+ m_count = iterator_range_size(counts_begin, counts_end) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint count = i*" << tile_size << ";\n" <<
+ "for(uint j = " << counts_begin[expr<uint_>("i")] << "; j<" <<
+ counts_begin[expr<uint_>("i+1")] << "; j++, count++)\n" <<
+ "{\n" <<
+ result[expr<uint_>("j")] << " = " << start[expr<uint_>("count")]
+ << ";\n" <<
+ "}\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} //end detail namespace
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP
diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp
new file mode 100644
index 0000000000..0bcee27ed5
--- /dev/null
+++ b/boost/compute/algorithm/detail/copy_on_device.hpp
@@ -0,0 +1,190 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
+
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+#include <boost/compute/detail/work_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+inline size_t pick_copy_work_group_size(size_t n, const device &device)
+{
+ (void) device;
+
+ if(n % 32 == 0) return 32;
+ else if(n % 16 == 0) return 16;
+ else if(n % 8 == 0) return 8;
+ else if(n % 4 == 0) return 4;
+ else if(n % 2 == 0) return 2;
+ else return 1;
+}
+
+template<class InputIterator, class OutputIterator>
+class copy_kernel : public meta_kernel
+{
+public:
+ copy_kernel(const device &device)
+ : meta_kernel("copy")
+ {
+ m_count = 0;
+
+ typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ std::string cache_key =
+ "__boost_copy_kernel_" + boost::lexical_cast<std::string>(sizeof(input_type));
+
+ m_vpt = parameters->get(cache_key, "vpt", 4);
+ m_tpb = parameters->get(cache_key, "tpb", 128);
+ }
+
+ void set_range(InputIterator first,
+ InputIterator last,
+ OutputIterator result)
+ {
+ m_count_arg = add_arg<uint_>("count");
+
+ *this <<
+ "uint index = get_local_id(0) + " <<
+ "(" << m_vpt * m_tpb << " * get_group_id(0));\n" <<
+ "for(uint i = 0; i < " << m_vpt << "; i++){\n" <<
+ " if(index < count){\n" <<
+ result[expr<uint_>("index")] << '=' <<
+ first[expr<uint_>("index")] << ";\n" <<
+ " index += " << m_tpb << ";\n"
+ " }\n"
+ "}\n";
+
+ m_count = detail::iterator_range_size(first, last);
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0){
+ // nothing to do
+ return event();
+ }
+
+ size_t global_work_size = calculate_work_size(m_count, m_vpt, m_tpb);
+
+ set_arg(m_count_arg, uint_(m_count));
+
+ return exec_1d(queue, 0, global_work_size, m_tpb);
+ }
+
+private:
+ size_t m_count;
+ size_t m_count_arg;
+ uint_ m_vpt;
+ uint_ m_tpb;
+};
+
+template<class InputIterator, class OutputIterator>
+inline OutputIterator copy_on_device(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+
+ copy_kernel<InputIterator, OutputIterator> kernel(device);
+
+ kernel.set_range(first, last, result);
+ kernel.exec(queue);
+
+ return result + std::distance(first, last);
+}
+
+template<class InputIterator>
+inline discard_iterator copy_on_device(InputIterator first,
+ InputIterator last,
+ discard_iterator result,
+ command_queue &queue)
+{
+ (void) queue;
+
+ return result + std::distance(first, last);
+}
+
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator> copy_on_device_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+
+ copy_kernel<InputIterator, OutputIterator> kernel(device);
+
+ kernel.set_range(first, last, result);
+ event event_ = kernel.exec(queue);
+
+ return make_future(result + std::distance(first, last), event_);
+}
+
+#ifdef CL_VERSION_2_0
+// copy_on_device() specialization for svm_ptr
+template<class T>
+inline svm_ptr<T> copy_on_device(svm_ptr<T> first,
+ svm_ptr<T> last,
+ svm_ptr<T> result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ queue.enqueue_svm_memcpy(
+ result.get(), first.get(), count * sizeof(T)
+ );
+
+ return result + count;
+}
+
+template<class T>
+inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first,
+ svm_ptr<T> last,
+ svm_ptr<T> result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ event event_ = queue.enqueue_svm_memcpy_async(
+ result.get(), first.get(), count * sizeof(T)
+ );
+
+ return make_future(result + count, event_);
+}
+#endif // CL_VERSION_2_0
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
diff --git a/boost/compute/algorithm/detail/copy_to_device.hpp b/boost/compute/algorithm/detail/copy_to_device.hpp
new file mode 100644
index 0000000000..90545fb4ed
--- /dev/null
+++ b/boost/compute/algorithm/detail/copy_to_device.hpp
@@ -0,0 +1,127 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP
+
+#include <iterator>
+
+#include <boost/utility/addressof.hpp>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class HostIterator, class DeviceIterator>
+inline DeviceIterator copy_to_device(HostIterator first,
+ HostIterator last,
+ DeviceIterator result,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<DeviceIterator>::value_type
+ value_type;
+ typedef typename
+ std::iterator_traits<DeviceIterator>::difference_type
+ difference_type;
+
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ size_t offset = result.get_index();
+
+ queue.enqueue_write_buffer(result.get_buffer(),
+ offset * sizeof(value_type),
+ count * sizeof(value_type),
+ ::boost::addressof(*first));
+
+ return result + static_cast<difference_type>(count);
+}
+
+template<class HostIterator, class DeviceIterator>
+inline future<DeviceIterator> copy_to_device_async(HostIterator first,
+ HostIterator last,
+ DeviceIterator result,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<DeviceIterator>::value_type
+ value_type;
+ typedef typename
+ std::iterator_traits<DeviceIterator>::difference_type
+ difference_type;
+
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return future<DeviceIterator>();
+ }
+
+ size_t offset = result.get_index();
+
+ event event_ =
+ queue.enqueue_write_buffer_async(result.get_buffer(),
+ offset * sizeof(value_type),
+ count * sizeof(value_type),
+ ::boost::addressof(*first));
+
+ return make_future(result + static_cast<difference_type>(count), event_);
+}
+
+#ifdef CL_VERSION_2_0
+// copy_to_device() specialization for svm_ptr
+template<class HostIterator, class T>
+inline svm_ptr<T> copy_to_device(HostIterator first,
+ HostIterator last,
+ svm_ptr<T> result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ queue.enqueue_svm_memcpy(
+ result.get(), ::boost::addressof(*first), count * sizeof(T)
+ );
+
+ return result + count;
+}
+
+template<class HostIterator, class T>
+inline future<svm_ptr<T> > copy_to_device_async(HostIterator first,
+ HostIterator last,
+ svm_ptr<T> result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ event event_ = queue.enqueue_svm_memcpy_async(
+ result.get(), ::boost::addressof(*first), count * sizeof(T)
+ );
+
+ return make_future(result + count, event_);
+}
+#endif // CL_VERSION_2_0
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP
diff --git a/boost/compute/algorithm/detail/copy_to_host.hpp b/boost/compute/algorithm/detail/copy_to_host.hpp
new file mode 100644
index 0000000000..b889e0c871
--- /dev/null
+++ b/boost/compute/algorithm/detail/copy_to_host.hpp
@@ -0,0 +1,137 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP
+
+#include <iterator>
+
+#include <boost/utility/addressof.hpp>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+#include <boost/compute/detail/iterator_plus_distance.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class DeviceIterator, class HostIterator>
+inline HostIterator copy_to_host(DeviceIterator first,
+ DeviceIterator last,
+ HostIterator result,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<DeviceIterator>::value_type
+ value_type;
+
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ const buffer &buffer = first.get_buffer();
+ size_t offset = first.get_index();
+
+ queue.enqueue_read_buffer(buffer,
+ offset * sizeof(value_type),
+ count * sizeof(value_type),
+ ::boost::addressof(*result));
+
+ return iterator_plus_distance(result, count);
+}
+
+// copy_to_host() specialization for std::vector<bool>
+template<class DeviceIterator>
+inline std::vector<bool>::iterator
+copy_to_host(DeviceIterator first,
+ DeviceIterator last,
+ std::vector<bool>::iterator result,
+ command_queue &queue)
+{
+ std::vector<uint8_t> temp(std::distance(first, last));
+ copy_to_host(first, last, temp.begin(), queue);
+ return std::copy(temp.begin(), temp.end(), result);
+}
+
+template<class DeviceIterator, class HostIterator>
+inline future<HostIterator> copy_to_host_async(DeviceIterator first,
+ DeviceIterator last,
+ HostIterator result,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<DeviceIterator>::value_type
+ value_type;
+
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return future<HostIterator>();
+ }
+
+ const buffer &buffer = first.get_buffer();
+ size_t offset = first.get_index();
+
+ event event_ =
+ queue.enqueue_read_buffer_async(buffer,
+ offset * sizeof(value_type),
+ count * sizeof(value_type),
+ ::boost::addressof(*result));
+
+ return make_future(iterator_plus_distance(result, count), event_);
+}
+
+#ifdef CL_VERSION_2_0
+// copy_to_host() specialization for svm_ptr
+template<class T, class HostIterator>
+inline HostIterator copy_to_host(svm_ptr<T> first,
+ svm_ptr<T> last,
+ HostIterator result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ queue.enqueue_svm_memcpy(
+ ::boost::addressof(*result), first.get(), count * sizeof(T)
+ );
+
+ return result + count;
+}
+
+template<class T, class HostIterator>
+inline future<HostIterator> copy_to_host_async(svm_ptr<T> first,
+ svm_ptr<T> last,
+ HostIterator result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ event event_ = queue.enqueue_svm_memcpy_async(
+ ::boost::addressof(*result), first.get(), count * sizeof(T)
+ );
+
+ return make_future(iterator_plus_distance(result, count), event_);
+}
+#endif // CL_VERSION_2_0
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP
diff --git a/boost/compute/algorithm/detail/count_if_with_ballot.hpp b/boost/compute/algorithm/detail/count_if_with_ballot.hpp
new file mode 100644
index 0000000000..584ef37ab9
--- /dev/null
+++ b/boost/compute/algorithm/detail/count_if_with_ballot.hpp
@@ -0,0 +1,78 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
+
+#include <boost/compute/context.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/algorithm/reduce.hpp>
+#include <boost/compute/functional/detail/nvidia_ballot.hpp>
+#include <boost/compute/functional/detail/nvidia_popcount.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Predicate>
+inline size_t count_if_with_ballot(InputIterator first,
+ InputIterator last,
+ Predicate predicate,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ size_t block_size = 32;
+ size_t block_count = count / block_size;
+ if(block_count * block_size != count){
+ block_count++;
+ }
+
+ const ::boost::compute::context &context = queue.get_context();
+
+ ::boost::compute::vector<uint_> counts(block_count, context);
+
+ ::boost::compute::detail::nvidia_popcount<uint_> popc;
+ ::boost::compute::detail::nvidia_ballot<uint_> ballot;
+
+ meta_kernel k("count_if_with_ballot");
+ k <<
+ "const uint gid = get_global_id(0);\n" <<
+
+ "bool value = false;\n" <<
+ "if(gid < count)\n" <<
+ " value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" <<
+
+ "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" <<
+
+ "if(get_local_id(0) == 0)\n" <<
+ counts.begin()[k.var<uint_>("get_group_id(0)") ]
+ << " = " << popc(k.var<uint_>("bits")) << ";\n";
+
+ k.add_set_arg<const uint_>("count", count);
+
+ k.exec_1d(queue, 0, block_size * block_count, block_size);
+
+ uint_ result;
+ ::boost::compute::reduce(
+ counts.begin(),
+ counts.end(),
+ &result,
+ queue
+ );
+ return result;
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
diff --git a/boost/compute/algorithm/detail/count_if_with_reduce.hpp b/boost/compute/algorithm/detail/count_if_with_reduce.hpp
new file mode 100644
index 0000000000..f9449f4a41
--- /dev/null
+++ b/boost/compute/algorithm/detail/count_if_with_reduce.hpp
@@ -0,0 +1,87 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP
+
+#include <boost/compute/algorithm/reduce.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Predicate, class Arg>
+struct invoked_countable_predicate
+{
+ invoked_countable_predicate(Predicate p, Arg a)
+ : predicate(p), arg(a)
+ {
+ }
+
+ Predicate predicate;
+ Arg arg;
+};
+
+template<class Predicate, class Arg>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_countable_predicate<Predicate, Arg> &expr)
+{
+ return kernel << "(" << expr.predicate(expr.arg) << " ? 1 : 0)";
+}
+
+// the countable_predicate wraps Predicate and converts its result from
+// bool to ulong so that it can be used with reduce()
+template<class Predicate>
+struct countable_predicate
+{
+ typedef ulong_ result_type;
+
+ countable_predicate(Predicate predicate)
+ : m_predicate(predicate)
+ {
+ }
+
+ template<class Arg>
+ invoked_countable_predicate<Predicate, Arg> operator()(const Arg &arg) const
+ {
+ return invoked_countable_predicate<Predicate, Arg>(m_predicate, arg);
+ }
+
+ Predicate m_predicate;
+};
+
+// counts the number of elements matching predicate using reduce()
+template<class InputIterator, class Predicate>
+inline size_t count_if_with_reduce(InputIterator first,
+ InputIterator last,
+ Predicate predicate,
+ command_queue &queue)
+{
+ countable_predicate<Predicate> reduce_predicate(predicate);
+
+ ulong_ count = 0;
+ ::boost::compute::reduce(
+ ::boost::compute::make_transform_iterator(first, reduce_predicate),
+ ::boost::compute::make_transform_iterator(last, reduce_predicate),
+ &count,
+ ::boost::compute::plus<ulong_>(),
+ queue
+ );
+
+ return static_cast<size_t>(count);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP
diff --git a/boost/compute/algorithm/detail/count_if_with_threads.hpp b/boost/compute/algorithm/detail/count_if_with_threads.hpp
new file mode 100644
index 0000000000..6f282982e0
--- /dev/null
+++ b/boost/compute/algorithm/detail/count_if_with_threads.hpp
@@ -0,0 +1,129 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
+
+#include <numeric>
+
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Predicate>
+class count_if_with_threads_kernel : meta_kernel
+{
+public:
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ value_type;
+
+ count_if_with_threads_kernel()
+ : meta_kernel("count_if_with_threads")
+ {
+ }
+
+ void set_args(InputIterator first,
+ InputIterator last,
+ Predicate predicate)
+
+ {
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+ m_size = detail::iterator_range_size(first, last);
+
+ m_size_arg = add_arg<const ulong_>("size");
+ m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
+
+ *this <<
+ // thread parameters
+ "const uint gid = get_global_id(0);\n" <<
+ "const uint block_size = size / get_global_size(0);\n" <<
+ "const uint start = block_size * gid;\n" <<
+ "uint end = 0;\n" <<
+ "if(gid == get_global_size(0) - 1)\n" <<
+ " end = size;\n" <<
+ "else\n" <<
+ " end = block_size * gid + block_size;\n" <<
+
+ // count values
+ "uint count = 0;\n" <<
+ "for(uint i = start; i < end; i++){\n" <<
+ decl<const T>("value") << "="
+ << first[expr<uint_>("i")] << ";\n" <<
+ if_(predicate(var<const T>("value"))) << "{\n" <<
+ "count++;\n" <<
+ "}\n" <<
+ "}\n" <<
+
+ // write count
+ "counts[gid] = count;\n";
+ }
+
+ size_t exec(command_queue &queue)
+ {
+ const device &device = queue.get_device();
+ const context &context = queue.get_context();
+
+ size_t threads = device.compute_units();
+
+ const size_t minimum_block_size = 2048;
+ if(m_size / threads < minimum_block_size){
+ threads = static_cast<size_t>(
+ (std::max)(
+ std::ceil(float(m_size) / minimum_block_size),
+ 1.0f
+ )
+ );
+ }
+
+ // storage for counts
+ ::boost::compute::vector<ulong_> counts(threads, context);
+
+ // exec kernel
+ set_arg(m_size_arg, static_cast<ulong_>(m_size));
+ set_arg(m_counts_arg, counts.get_buffer());
+ exec_1d(queue, 0, threads, 1);
+
+ // copy counts to the host
+ std::vector<ulong_> host_counts(threads);
+ ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
+
+ // return sum of counts
+ return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
+ }
+
+private:
+ size_t m_size;
+ size_t m_size_arg;
+ size_t m_counts_arg;
+};
+
+// counts values that match the predicate using one thread per block. this is
+// optimized for cpu-type devices with a small number of compute units.
+template<class InputIterator, class Predicate>
+inline size_t count_if_with_threads(InputIterator first,
+ InputIterator last,
+ Predicate predicate,
+ command_queue &queue)
+{
+ count_if_with_threads_kernel<InputIterator, Predicate> kernel;
+ kernel.set_args(first, last, predicate);
+ return kernel.exec(queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
diff --git a/boost/compute/algorithm/detail/find_extrema.hpp b/boost/compute/algorithm/detail/find_extrema.hpp
new file mode 100644
index 0000000000..6e756c3904
--- /dev/null
+++ b/boost/compute/algorithm/detail/find_extrema.hpp
@@ -0,0 +1,64 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP
+
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/algorithm/detail/find_extrema_with_reduce.hpp>
+#include <boost/compute/algorithm/detail/find_extrema_with_atomics.hpp>
+#include <boost/compute/algorithm/detail/serial_find_extrema.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Compare>
+inline InputIterator find_extrema(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ const bool find_minimum,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+
+ // handle trivial cases
+ if(count == 0 || count == 1){
+ return first;
+ }
+
+ const device &device = queue.get_device();
+
+ // use serial method for small inputs
+ // and when device is a CPU
+ if(count < 512 || (device.type() & device::cpu)){
+ return serial_find_extrema(first, last, compare, find_minimum, queue);
+ }
+
+ // find_extrema_with_reduce() is used only if requirements are met
+ if(find_extrema_with_reduce_requirements_met(first, last, queue))
+ {
+ return find_extrema_with_reduce(first, last, compare, find_minimum, queue);
+ }
+
+ // use serial method for OpenCL version 1.0 due to
+ // problems with atomic_cmpxchg()
+ #ifndef CL_VERSION_1_1
+ return serial_find_extrema(first, last, compare, find_minimum, queue);
+ #endif
+
+ return find_extrema_with_atomics(first, last, compare, find_minimum, queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP
diff --git a/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp b/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp
new file mode 100644
index 0000000000..406d1becb7
--- /dev/null
+++ b/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp
@@ -0,0 +1,108 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP
+
+#include <boost/compute/types.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/functional/atomic.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Compare>
+inline InputIterator find_extrema_with_atomics(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ const bool find_minimum,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ const context &context = queue.get_context();
+
+ meta_kernel k("find_extrema");
+ atomic_cmpxchg<uint_> atomic_cmpxchg_uint;
+
+ k <<
+ "const uint gid = get_global_id(0);\n" <<
+ "uint old_index = *index;\n" <<
+
+ k.decl<value_type>("old") <<
+ " = " << first[k.var<uint_>("old_index")] << ";\n" <<
+ k.decl<value_type>("new") <<
+ " = " << first[k.var<uint_>("gid")] << ";\n" <<
+
+ k.decl<bool>("compare_result") << ";\n" <<
+ "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" <<
+ "while(" <<
+ "(compare_result = " << compare(k.var<value_type>("old"),
+ k.var<value_type>("new")) << ")" <<
+ " || (!(compare_result" <<
+ " || " << compare(k.var<value_type>("new"),
+ k.var<value_type>("old")) << ") "
+ "&& gid < old_index)){\n" <<
+ "#else\n" <<
+ // while condition explained for minimum case with less (<)
+ // as comparison function:
+ // while(new_value < old_value
+ // OR (new_value == old_value AND new_index < old_index))
+ "while(" <<
+ "(compare_result = " << compare(k.var<value_type>("new"),
+ k.var<value_type>("old")) << ")" <<
+ " || (!(compare_result" <<
+ " || " << compare(k.var<value_type>("old"),
+ k.var<value_type>("new")) << ") "
+ "&& gid < old_index)){\n" <<
+ "#endif\n" <<
+
+ " if(" << atomic_cmpxchg_uint(k.var<uint_ *>("index"),
+ k.var<uint_>("old_index"),
+ k.var<uint_>("gid")) << " == old_index)\n" <<
+ " break;\n" <<
+ " else\n" <<
+ " old_index = *index;\n" <<
+ "old = " << first[k.var<uint_>("old_index")] << ";\n" <<
+ "}\n";
+
+ size_t index_arg_index = k.add_arg<uint_ *>(memory_object::global_memory, "index");
+
+ std::string options;
+ if(!find_minimum){
+ options = "-DBOOST_COMPUTE_FIND_MAXIMUM";
+ }
+ kernel kernel = k.compile(context, options);
+
+ // setup index buffer
+ scalar<uint_> index(context);
+ kernel.set_arg(index_arg_index, index.get_buffer());
+
+ // initialize index
+ index.write(0, queue);
+
+ // run kernel
+ size_t count = iterator_range_size(first, last);
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ // read index and return iterator
+ return first + static_cast<difference_type>(index.read(queue));
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP
diff --git a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
new file mode 100644
index 0000000000..1fbb7dee19
--- /dev/null
+++ b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
@@ -0,0 +1,443 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP
+
+#include <algorithm>
+
+#include <boost/compute/types.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/allocator/pinned_allocator.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+#include <boost/compute/memory/local_buffer.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator>
+bool find_extrema_with_reduce_requirements_met(InputIterator first,
+ InputIterator last,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+
+ const device &device = queue.get_device();
+
+ // device must have dedicated local memory storage
+ // otherwise reduction would be highly inefficient
+ if(device.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() != CL_LOCAL)
+ {
+ return false;
+ }
+
+ const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
+ // local memory size in bytes (per compute unit)
+ const size_t local_mem_size = device.get_info<CL_DEVICE_LOCAL_MEM_SIZE>();
+
+ std::string cache_key = std::string("__boost_find_extrema_reduce_")
+ + type_name<input_type>();
+ // load parameters
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // Get preferred work group size
+ size_t work_group_size = parameters->get(cache_key, "wgsize", 256);
+
+ work_group_size = (std::min)(max_work_group_size, work_group_size);
+
+ // local memory size needed to perform parallel reduction
+ size_t required_local_mem_size = 0;
+ // indices size
+ required_local_mem_size += sizeof(uint_) * work_group_size;
+ // values size
+ required_local_mem_size += sizeof(input_type) * work_group_size;
+
+ // at least 4 work groups per compute unit otherwise reduction
+ // would be highly inefficient
+ return ((required_local_mem_size * 4) <= local_mem_size);
+}
+
+/// \internal_
+/// Algorithm finds the first extremum in given range, i.e., with the lowest
+/// index.
+///
+/// If \p use_input_idx is false, it's assumed that input data is ordered by
+/// increasing index and \p input_idx is not used in the algorithm.
+template<class InputIterator, class ResultIterator, class Compare>
+inline void find_extrema_with_reduce(InputIterator input,
+ vector<uint_>::iterator input_idx,
+ size_t count,
+ ResultIterator result,
+ vector<uint_>::iterator result_idx,
+ size_t work_groups_no,
+ size_t work_group_size,
+ Compare compare,
+ const bool find_minimum,
+ const bool use_input_idx,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+
+ const context &context = queue.get_context();
+
+ meta_kernel k("find_extrema_reduce");
+ size_t count_arg = k.add_arg<uint_>("count");
+ size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block");
+ size_t block_idx_arg = k.add_arg<uint_ *>(memory_object::local_memory, "block_idx");
+
+ k <<
+ // Work item global id
+ k.decl<const uint_>("gid") << " = get_global_id(0);\n" <<
+
+ // Index of element that will be read from input buffer
+ k.decl<uint_>("idx") << " = gid;\n" <<
+
+ k.decl<input_type>("acc") << ";\n" <<
+ k.decl<uint_>("acc_idx") << ";\n" <<
+ "if(gid < count) {\n" <<
+ // Real index of currently best element
+ "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
+ k.var<uint_>("acc_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" <<
+ "#else\n" <<
+ k.var<uint_>("acc_idx") << " = idx;\n" <<
+ "#endif\n" <<
+
+ // Init accumulator with first[get_global_id(0)]
+ "acc = " << input[k.var<uint_>("idx")] << ";\n" <<
+ "idx += get_global_size(0);\n" <<
+ "}\n" <<
+
+ k.decl<bool>("compare_result") << ";\n" <<
+ k.decl<bool>("equal") << ";\n\n" <<
+ "while( idx < count ){\n" <<
+ // Next element
+ k.decl<input_type>("next") << " = " << input[k.var<uint_>("idx")] << ";\n" <<
+ "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
+ k.decl<input_type>("next_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" <<
+ "#endif\n" <<
+
+ // Comparison between currently best element (acc) and next element
+ "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" <<
+ "compare_result = " << compare(k.var<input_type>("next"),
+ k.var<input_type>("acc")) << ";\n" <<
+ "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
+ "equal = !compare_result && !" <<
+ compare(k.var<input_type>("acc"),
+ k.var<input_type>("next")) << ";\n" <<
+ "# endif\n" <<
+ "#else\n" <<
+ "compare_result = " << compare(k.var<input_type>("acc"),
+ k.var<input_type>("next")) << ";\n" <<
+ "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
+ "equal = !compare_result && !" <<
+ compare(k.var<input_type>("next"),
+ k.var<input_type>("acc")) << ";\n" <<
+ "# endif\n" <<
+ "#endif\n" <<
+
+ // save the winner
+ "acc = compare_result ? acc : next;\n" <<
+ "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
+ "acc_idx = compare_result ? " <<
+ "acc_idx : " <<
+ "(equal ? min(acc_idx, next_idx) : next_idx);\n" <<
+ "#else\n" <<
+ "acc_idx = compare_result ? acc_idx : idx;\n" <<
+ "#endif\n" <<
+ "idx += get_global_size(0);\n" <<
+ "}\n\n" <<
+
+ // Work item local id
+ k.decl<const uint_>("lid") << " = get_local_id(0);\n" <<
+ "block[lid] = acc;\n" <<
+ "block_idx[lid] = acc_idx;\n" <<
+ "barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+
+ k.decl<uint_>("group_offset") <<
+ " = count - (get_local_size(0) * get_group_id(0));\n\n";
+
+ k <<
+ "#pragma unroll\n"
+ "for(" << k.decl<uint_>("offset") << " = " << uint_(work_group_size) << " / 2; offset > 0; " <<
+ "offset = offset / 2) {\n" <<
+ "if((lid < offset) && ((lid + offset) < group_offset)) { \n" <<
+ k.decl<input_type>("mine") << " = block[lid];\n" <<
+ k.decl<input_type>("other") << " = block[lid+offset];\n" <<
+ "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" <<
+ "compare_result = " << compare(k.var<input_type>("other"),
+ k.var<input_type>("mine")) << ";\n" <<
+ "equal = !compare_result && !" <<
+ compare(k.var<input_type>("mine"),
+ k.var<input_type>("other")) << ";\n" <<
+ "#else\n" <<
+ "compare_result = " << compare(k.var<input_type>("mine"),
+ k.var<input_type>("other")) << ";\n" <<
+ "equal = !compare_result && !" <<
+ compare(k.var<input_type>("other"),
+ k.var<input_type>("mine")) << ";\n" <<
+ "#endif\n" <<
+ "block[lid] = compare_result ? mine : other;\n" <<
+ k.decl<uint_>("mine_idx") << " = block_idx[lid];\n" <<
+ k.decl<uint_>("other_idx") << " = block_idx[lid+offset];\n" <<
+ "block_idx[lid] = compare_result ? " <<
+ "mine_idx : " <<
+ "(equal ? min(mine_idx, other_idx) : other_idx);\n" <<
+ "}\n"
+ "barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ "}\n\n" <<
+
+ // write block result to global output
+ "if(lid == 0){\n" <<
+ result[k.var<uint_>("get_group_id(0)")] << " = block[0];\n" <<
+ result_idx[k.var<uint_>("get_group_id(0)")] << " = block_idx[0];\n" <<
+ "}";
+
+ std::string options;
+ if(!find_minimum){
+ options = "-DBOOST_COMPUTE_FIND_MAXIMUM";
+ }
+ if(use_input_idx){
+ options += " -DBOOST_COMPUTE_USE_INPUT_IDX";
+ }
+
+ kernel kernel = k.compile(context, options);
+
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+ kernel.set_arg(block_arg, local_buffer<input_type>(work_group_size));
+ kernel.set_arg(block_idx_arg, local_buffer<uint_>(work_group_size));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ work_groups_no * work_group_size,
+ work_group_size);
+}
+
+template<class InputIterator, class ResultIterator, class Compare>
+inline void find_extrema_with_reduce(InputIterator input,
+ size_t count,
+ ResultIterator result,
+ vector<uint_>::iterator result_idx,
+ size_t work_groups_no,
+ size_t work_group_size,
+ Compare compare,
+ const bool find_minimum,
+ command_queue &queue)
+{
+ // dummy will not be used
+ buffer_iterator<uint_> dummy = result_idx;
+ return find_extrema_with_reduce(
+ input, dummy, count, result, result_idx, work_groups_no,
+ work_group_size, compare, find_minimum, false, queue
+ );
+}
+
+template<class InputIterator, class Compare>
+InputIterator find_extrema_with_reduce(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ const bool find_minimum,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+ typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+
+ const context &context = queue.get_context();
+ const device &device = queue.get_device();
+
+ // Getting information about used queue and device
+ const size_t compute_units_no = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>();
+ const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
+
+ const size_t count = detail::iterator_range_size(first, last);
+
+ std::string cache_key = std::string("__boost_find_extrema_with_reduce_")
+ + type_name<input_type>();
+
+ // load parameters
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // get preferred work group size and preferred number
+ // of work groups per compute unit
+ size_t work_group_size = parameters->get(cache_key, "wgsize", 256);
+ size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 100);
+
+ // calculate work group size and number of work groups
+ work_group_size = (std::min)(max_work_group_size, work_group_size);
+ size_t work_groups_no = compute_units_no * work_groups_per_cu;
+ work_groups_no = (std::min)(
+ work_groups_no,
+ static_cast<size_t>(std::ceil(float(count) / work_group_size))
+ );
+
+ // phase I: finding candidates for extremum
+
+ // device buffors for extremum candidates and their indices
+ // each work-group computes its candidate
+ vector<input_type> candidates(work_groups_no, context);
+ vector<uint_> candidates_idx(work_groups_no, context);
+
+ // finding candidates for first extremum and their indices
+ find_extrema_with_reduce(
+ first, count, candidates.begin(), candidates_idx.begin(),
+ work_groups_no, work_group_size, compare, find_minimum, queue
+ );
+
+ // phase II: finding extremum from among the candidates
+
+ // zero-copy buffers for final result (value and index)
+ vector<input_type, ::boost::compute::pinned_allocator<input_type> >
+ result(1, context);
+ vector<uint_, ::boost::compute::pinned_allocator<uint_> >
+ result_idx(1, context);
+
+ // get extremum from among the candidates
+ find_extrema_with_reduce(
+ candidates.begin(), candidates_idx.begin(), work_groups_no, result.begin(),
+ result_idx.begin(), 1, work_group_size, compare, find_minimum, true, queue
+ );
+
+ // mapping extremum index to host
+ uint_* result_idx_host_ptr =
+ static_cast<uint_*>(
+ queue.enqueue_map_buffer(
+ result_idx.get_buffer(), command_queue::map_read,
+ 0, sizeof(uint_)
+ )
+ );
+
+ return first + static_cast<difference_type>(*result_idx_host_ptr);
+}
+
+template<class InputIterator>
+InputIterator find_extrema_with_reduce(InputIterator first,
+ InputIterator last,
+ ::boost::compute::less<
+ typename std::iterator_traits<
+ InputIterator
+ >::value_type
+ >
+ compare,
+ const bool find_minimum,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+ typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+
+ const context &context = queue.get_context();
+ const device &device = queue.get_device();
+
+ // Getting information about used queue and device
+ const size_t compute_units_no = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>();
+ const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
+
+ const size_t count = detail::iterator_range_size(first, last);
+
+ std::string cache_key = std::string("__boost_find_extrema_with_reduce_")
+ + type_name<input_type>();
+
+ // load parameters
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // get preferred work group size and preferred number
+ // of work groups per compute unit
+ size_t work_group_size = parameters->get(cache_key, "wgsize", 256);
+ size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 64);
+
+ // calculate work group size and number of work groups
+ work_group_size = (std::min)(max_work_group_size, work_group_size);
+ size_t work_groups_no = compute_units_no * work_groups_per_cu;
+ work_groups_no = (std::min)(
+ work_groups_no,
+ static_cast<size_t>(std::ceil(float(count) / work_group_size))
+ );
+
+ // phase I: finding candidates for extremum
+
+ // device buffors for extremum candidates and their indices
+ // each work-group computes its candidate
+ // zero-copy buffers are used to eliminate copying data back to host
+ vector<input_type, ::boost::compute::pinned_allocator<input_type> >
+ candidates(work_groups_no, context);
+ vector<uint_, ::boost::compute::pinned_allocator <uint_> >
+ candidates_idx(work_groups_no, context);
+
+ // finding candidates for first extremum and their indices
+ find_extrema_with_reduce(
+ first, count, candidates.begin(), candidates_idx.begin(),
+ work_groups_no, work_group_size, compare, find_minimum, queue
+ );
+
+ // phase II: finding extremum from among the candidates
+
+ // mapping candidates and their indices to host
+ input_type* candidates_host_ptr =
+ static_cast<input_type*>(
+ queue.enqueue_map_buffer(
+ candidates.get_buffer(), command_queue::map_read,
+ 0, work_groups_no * sizeof(input_type)
+ )
+ );
+
+ uint_* candidates_idx_host_ptr =
+ static_cast<uint_*>(
+ queue.enqueue_map_buffer(
+ candidates_idx.get_buffer(), command_queue::map_read,
+ 0, work_groups_no * sizeof(uint_)
+ )
+ );
+
+ input_type* i = candidates_host_ptr;
+ uint_* idx = candidates_idx_host_ptr;
+ uint_* extremum_idx = idx;
+ input_type extremum = *candidates_host_ptr;
+ i++; idx++;
+
+ // find extremum (serial) from among the candidates on host
+ if(!find_minimum) {
+ while(idx != (candidates_idx_host_ptr + work_groups_no)) {
+ input_type next = *i;
+ bool compare_result = next > extremum;
+ bool equal = next == extremum;
+ extremum = compare_result ? next : extremum;
+ extremum_idx = compare_result ? idx : extremum_idx;
+ extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx;
+ idx++, i++;
+ }
+ }
+ else {
+ while(idx != (candidates_idx_host_ptr + work_groups_no)) {
+ input_type next = *i;
+ bool compare_result = next < extremum;
+ bool equal = next == extremum;
+ extremum = compare_result ? next : extremum;
+ extremum_idx = compare_result ? idx : extremum_idx;
+ extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx;
+ idx++, i++;
+ }
+ }
+
+ return first + static_cast<difference_type>(*extremum_idx);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP
diff --git a/boost/compute/algorithm/detail/find_if_with_atomics.hpp b/boost/compute/algorithm/detail/find_if_with_atomics.hpp
new file mode 100644
index 0000000000..112c34cf00
--- /dev/null
+++ b/boost/compute/algorithm/detail/find_if_with_atomics.hpp
@@ -0,0 +1,212 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP
+
+#include <iterator>
+
+#include <boost/compute/types.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator find_if_with_atomics_one_vpt(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ const size_t count,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("find_if");
+ size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index");
+ atomic_min<uint_> atomic_min_uint;
+
+ k << k.decl<const uint_>("i") << " = get_global_id(0);\n"
+ << k.decl<const value_type>("value") << "="
+ << first[k.var<const uint_>("i")] << ";\n"
+ << "if(" << predicate(k.var<const value_type>("value")) << "){\n"
+ << " " << atomic_min_uint(k.var<uint_ *>("index"), k.var<uint_>("i")) << ";\n"
+ << "}\n";
+
+ kernel kernel = k.compile(context);
+
+ scalar<uint_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+
+ // initialize index to the last iterator's index
+ index.write(static_cast<uint_>(count), queue);
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ // read index and return iterator
+ return first + static_cast<difference_type>(index.read(queue));
+}
+
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ const size_t count,
+ const size_t vpt,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ const context &context = queue.get_context();
+ const device &device = queue.get_device();
+
+ detail::meta_kernel k("find_if");
+ size_t index_arg = k.add_arg<uint_ *>(memory_object::global_memory, "index");
+ size_t count_arg = k.add_arg<const uint_>("count");
+ size_t vpt_arg = k.add_arg<const uint_>("vpt");
+ atomic_min<uint_> atomic_min_uint;
+
+ // for GPUs reads from global memory are coalesced
+ if(device.type() & device::gpu) {
+ k <<
+ k.decl<const uint_>("lsize") << " = get_local_size(0);\n" <<
+ k.decl<uint_>("id") << " = get_local_id(0) + get_group_id(0) * lsize * vpt;\n" <<
+ k.decl<const uint_>("end") << " = min(" <<
+ "id + (lsize *" << k.var<uint_>("vpt") << ")," <<
+ "count" <<
+ ");\n" <<
+
+ // checking if the index is already found
+ "__local uint local_index;\n" <<
+ "if(get_local_id(0) == 0){\n" <<
+ " local_index = *index;\n " <<
+ "};\n" <<
+ "barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ "if(local_index < id){\n" <<
+ " return;\n" <<
+ "}\n" <<
+
+ "while(id < end){\n" <<
+ " " << k.decl<const value_type>("value") << " = " <<
+ first[k.var<const uint_>("id")] << ";\n"
+ " if(" << predicate(k.var<const value_type>("value")) << "){\n" <<
+ " " << atomic_min_uint(k.var<uint_ *>("index"),
+ k.var<uint_>("id")) << ";\n" <<
+ " return;\n"
+ " }\n" <<
+ " id+=lsize;\n" <<
+ "}\n";
+ // for CPUs (and other devices) reads are ordered so the big cache is
+ // efficiently used.
+ } else {
+ k <<
+ k.decl<uint_>("id") << " = get_global_id(0) * " << k.var<uint_>("vpt") << ";\n" <<
+ k.decl<const uint_>("end") << " = min(" <<
+ "id + " << k.var<uint_>("vpt") << "," <<
+ "count" <<
+ ");\n" <<
+ "while(id < end && (*index) > id){\n" <<
+ " " << k.decl<const value_type>("value") << " = " <<
+ first[k.var<const uint_>("id")] << ";\n"
+ " if(" << predicate(k.var<const value_type>("value")) << "){\n" <<
+ " " << atomic_min_uint(k.var<uint_ *>("index"),
+ k.var<uint_>("id")) << ";\n" <<
+ " return;\n" <<
+ " }\n" <<
+ " id++;\n" <<
+ "}\n";
+ }
+
+ kernel kernel = k.compile(context);
+
+ scalar<uint_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+ kernel.set_arg(vpt_arg, static_cast<uint_>(vpt));
+
+ // initialize index to the last iterator's index
+ index.write(static_cast<uint_>(count), queue);
+
+ const size_t global_wg_size = static_cast<size_t>(
+ std::ceil(float(count) / vpt)
+ );
+ queue.enqueue_1d_range_kernel(kernel, 0, global_wg_size, 0);
+
+ // read index and return iterator
+ return first + static_cast<difference_type>(index.read(queue));
+}
+
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator find_if_with_atomics(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return last;
+ }
+
+ const device &device = queue.get_device();
+
+ // load cached parameters
+ std::string cache_key = std::string("__boost_find_if_with_atomics_")
+ + type_name<value_type>();
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // for relatively small inputs on GPUs kernel checking one value per thread
+ // (work-item) is more efficient than its multiple values per thread version
+ if(device.type() & device::gpu){
+ const size_t one_vpt_threshold =
+ parameters->get(cache_key, "one_vpt_threshold", 1048576);
+ if(count <= one_vpt_threshold){
+ return find_if_with_atomics_one_vpt(
+ first, last, predicate, count, queue
+ );
+ }
+ }
+
+ // values per thread
+ size_t vpt;
+ if(device.type() & device::gpu){
+ // get vpt parameter
+ vpt = parameters->get(cache_key, "vpt", 32);
+ } else {
+ // for CPUs work is split equally between compute units
+ const size_t max_compute_units =
+ device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>();
+ vpt = static_cast<size_t>(
+ std::ceil(float(count) / max_compute_units)
+ );
+ }
+
+ return find_if_with_atomics_multiple_vpt(
+ first, last, predicate, count, vpt, queue
+ );
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP
diff --git a/boost/compute/algorithm/detail/inplace_reduce.hpp b/boost/compute/algorithm/detail/inplace_reduce.hpp
new file mode 100644
index 0000000000..60c61e83fe
--- /dev/null
+++ b/boost/compute/algorithm/detail/inplace_reduce.hpp
@@ -0,0 +1,136 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP
+
+#include <iterator>
+
+#include <boost/utility/result_of.hpp>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/memory/local_buffer.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator, class BinaryFunction>
+inline void inplace_reduce(Iterator first,
+ Iterator last,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<Iterator>::value_type
+ value_type;
+
+ size_t input_size = iterator_range_size(first, last);
+ if(input_size < 2){
+ return;
+ }
+
+ const context &context = queue.get_context();
+
+ size_t block_size = 64;
+ size_t values_per_thread = 8;
+ size_t block_count = input_size / (block_size * values_per_thread);
+ if(block_count * block_size * values_per_thread != input_size)
+ block_count++;
+
+ vector<value_type> output(block_count, context);
+
+ meta_kernel k("inplace_reduce");
+ size_t input_arg = k.add_arg<value_type *>(memory_object::global_memory, "input");
+ size_t input_size_arg = k.add_arg<const uint_>("input_size");
+ size_t output_arg = k.add_arg<value_type *>(memory_object::global_memory, "output");
+ size_t scratch_arg = k.add_arg<value_type *>(memory_object::local_memory, "scratch");
+ k <<
+ "const uint gid = get_global_id(0);\n" <<
+ "const uint lid = get_local_id(0);\n" <<
+ "const uint values_per_thread =\n"
+ << uint_(values_per_thread) << ";\n" <<
+
+ // thread reduce
+ "const uint index = gid * values_per_thread;\n" <<
+ "if(index < input_size){\n" <<
+ k.decl<value_type>("sum") << " = input[index];\n" <<
+ "for(uint i = 1;\n" <<
+ "i < values_per_thread && (index + i) < input_size;\n" <<
+ "i++){\n" <<
+ " sum = " <<
+ function(k.var<value_type>("sum"),
+ k.var<value_type>("input[index+i]")) << ";\n" <<
+ "}\n" <<
+ "scratch[lid] = sum;\n" <<
+ "}\n" <<
+
+ // local reduce
+ "for(uint i = 1; i < get_local_size(0); i <<= 1){\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " uint mask = (i << 1) - 1;\n" <<
+ " uint next_index = (gid + i) * values_per_thread;\n"
+ " if((lid & mask) == 0 && next_index < input_size){\n" <<
+ " scratch[lid] = " <<
+ function(k.var<value_type>("scratch[lid]"),
+ k.var<value_type>("scratch[lid+i]")) << ";\n" <<
+ " }\n" <<
+ "}\n" <<
+
+ // write output for block
+ "if(lid == 0){\n" <<
+ " output[get_group_id(0)] = scratch[0];\n" <<
+ "}\n"
+ ;
+
+ const buffer *input_buffer = &first.get_buffer();
+ const buffer *output_buffer = &output.get_buffer();
+
+ kernel kernel = k.compile(context);
+
+ while(input_size > 1){
+ kernel.set_arg(input_arg, *input_buffer);
+ kernel.set_arg(input_size_arg, static_cast<uint_>(input_size));
+ kernel.set_arg(output_arg, *output_buffer);
+ kernel.set_arg(scratch_arg, local_buffer<value_type>(block_size));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ block_count * block_size,
+ block_size);
+
+ input_size =
+ static_cast<size_t>(
+ std::ceil(float(input_size) / (block_size * values_per_thread)
+ )
+ );
+
+ block_count = input_size / (block_size * values_per_thread);
+ if(block_count * block_size * values_per_thread != input_size)
+ block_count++;
+
+ std::swap(input_buffer, output_buffer);
+ }
+
+ if(input_buffer != &first.get_buffer()){
+ ::boost::compute::copy(output.begin(),
+ output.begin() + 1,
+ first,
+ queue);
+ }
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP
diff --git a/boost/compute/algorithm/detail/insertion_sort.hpp b/boost/compute/algorithm/detail/insertion_sort.hpp
new file mode 100644
index 0000000000..4b5b95139a
--- /dev/null
+++ b/boost/compute/algorithm/detail/insertion_sort.hpp
@@ -0,0 +1,165 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP
+
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/memory/local_buffer.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator, class Compare>
+inline void serial_insertion_sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<Iterator>::value_type T;
+
+ size_t count = iterator_range_size(first, last);
+ if(count < 2){
+ return;
+ }
+
+ meta_kernel k("serial_insertion_sort");
+ size_t local_data_arg = k.add_arg<T *>(memory_object::local_memory, "data");
+ size_t count_arg = k.add_arg<uint_>("n");
+
+ k <<
+ // copy data to local memory
+ "for(uint i = 0; i < n; i++){\n" <<
+ " data[i] = " << first[k.var<uint_>("i")] << ";\n"
+ "}\n"
+
+ // sort data in local memory
+ "for(uint i = 1; i < n; i++){\n" <<
+ " " << k.decl<const T>("value") << " = data[i];\n" <<
+ " uint pos = i;\n" <<
+ " while(pos > 0 && " <<
+ compare(k.var<const T>("value"),
+ k.var<const T>("data[pos-1]")) << "){\n" <<
+ " data[pos] = data[pos-1];\n" <<
+ " pos--;\n" <<
+ " }\n" <<
+ " data[pos] = value;\n" <<
+ "}\n" <<
+
+ // copy sorted data to output
+ "for(uint i = 0; i < n; i++){\n" <<
+ " " << first[k.var<uint_>("i")] << " = data[i];\n"
+ "}\n";
+
+ const context &context = queue.get_context();
+ ::boost::compute::kernel kernel = k.compile(context);
+ kernel.set_arg(local_data_arg, local_buffer<T>(count));
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+
+ queue.enqueue_task(kernel);
+}
+
+template<class Iterator>
+inline void serial_insertion_sort(Iterator first,
+ Iterator last,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<Iterator>::value_type T;
+
+ ::boost::compute::less<T> less;
+
+ return serial_insertion_sort(first, last, less, queue);
+}
+
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void serial_insertion_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ Compare compare,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+ typedef typename std::iterator_traits<ValueIterator>::value_type value_type;
+
+ size_t count = iterator_range_size(keys_first, keys_last);
+ if(count < 2){
+ return;
+ }
+
+ meta_kernel k("serial_insertion_sort_by_key");
+ size_t local_keys_arg = k.add_arg<key_type *>(memory_object::local_memory, "keys");
+ size_t local_data_arg = k.add_arg<value_type *>(memory_object::local_memory, "data");
+ size_t count_arg = k.add_arg<uint_>("n");
+
+ k <<
+ // copy data to local memory
+ "for(uint i = 0; i < n; i++){\n" <<
+ " keys[i] = " << keys_first[k.var<uint_>("i")] << ";\n"
+ " data[i] = " << values_first[k.var<uint_>("i")] << ";\n"
+ "}\n"
+
+ // sort data in local memory
+ "for(uint i = 1; i < n; i++){\n" <<
+ " " << k.decl<const key_type>("key") << " = keys[i];\n" <<
+ " " << k.decl<const value_type>("value") << " = data[i];\n" <<
+ " uint pos = i;\n" <<
+ " while(pos > 0 && " <<
+ compare(k.var<const key_type>("key"),
+ k.var<const key_type>("keys[pos-1]")) << "){\n" <<
+ " keys[pos] = keys[pos-1];\n" <<
+ " data[pos] = data[pos-1];\n" <<
+ " pos--;\n" <<
+ " }\n" <<
+ " keys[pos] = key;\n" <<
+ " data[pos] = value;\n" <<
+ "}\n" <<
+
+ // copy sorted data to output
+ "for(uint i = 0; i < n; i++){\n" <<
+ " " << keys_first[k.var<uint_>("i")] << " = keys[i];\n"
+ " " << values_first[k.var<uint_>("i")] << " = data[i];\n"
+ "}\n";
+
+ const context &context = queue.get_context();
+ ::boost::compute::kernel kernel = k.compile(context);
+ kernel.set_arg(local_keys_arg, static_cast<uint_>(count * sizeof(key_type)), 0);
+ kernel.set_arg(local_data_arg, static_cast<uint_>(count * sizeof(value_type)), 0);
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+
+ queue.enqueue_task(kernel);
+}
+
+template<class KeyIterator, class ValueIterator>
+inline void serial_insertion_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+
+ serial_insertion_sort_by_key(
+ keys_first,
+ keys_last,
+ values_first,
+ boost::compute::less<key_type>(),
+ queue
+ );
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP
diff --git a/boost/compute/algorithm/detail/merge_path.hpp b/boost/compute/algorithm/detail/merge_path.hpp
new file mode 100644
index 0000000000..bc2c8fa88c
--- /dev/null
+++ b/boost/compute/algorithm/detail/merge_path.hpp
@@ -0,0 +1,116 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/find_if.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Merge Path kernel class
+///
+/// Subclass of meta_kernel to break two sets into tiles according
+/// to their merge path
+///
+class merge_path_kernel : public meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ merge_path_kernel() : meta_kernel("merge_path")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class OutputIterator1, class OutputIterator2,
+ class Compare>
+ void set_range(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator1 result_a,
+ OutputIterator2 result_b,
+ Compare comp)
+ {
+ m_a_count = iterator_range_size(first1, last1);
+ m_a_count_arg = add_arg<uint_>("a_count");
+
+ m_b_count = iterator_range_size(first2, last2);
+ m_b_count_arg = add_arg<uint_>("b_count");
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint target = (i+1)*" << tile_size << ";\n" <<
+ "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" <<
+ "uint end = min(target,a_count);\n" <<
+ "uint a_index, b_index;\n" <<
+ "while(start<end)\n" <<
+ "{\n" <<
+ " a_index = (start + end)/2;\n" <<
+ " b_index = target - a_index - 1;\n" <<
+ " if(!(" << comp(first2[expr<uint_>("b_index")],
+ first1[expr<uint_>("a_index")]) << "))\n" <<
+ " start = a_index + 1;\n" <<
+ " else end = a_index;\n" <<
+ "}\n" <<
+ result_a[expr<uint_>("i")] << " = start;\n" <<
+ result_b[expr<uint_>("i")] << " = target - start;\n";
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class OutputIterator1, class OutputIterator2>
+ void set_range(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator1 result_a,
+ OutputIterator2 result_b)
+ {
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+ ::boost::compute::less<value_type> less_than;
+ set_range(first1, last1, first2, last2, result_a, result_b, less_than);
+ }
+
+ event exec(command_queue &queue)
+ {
+ if((m_a_count + m_b_count)/tile_size == 0) {
+ return event();
+ }
+
+ set_arg(m_a_count_arg, uint_(m_a_count));
+ set_arg(m_b_count_arg, uint_(m_b_count));
+
+ return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size);
+ }
+
+private:
+ size_t m_a_count;
+ size_t m_a_count_arg;
+ size_t m_b_count;
+ size_t m_b_count_arg;
+};
+
+} //end detail namespace
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP
diff --git a/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp b/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp
new file mode 100644
index 0000000000..f4b53f10ae
--- /dev/null
+++ b/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp
@@ -0,0 +1,366 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP
+
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/merge_with_merge_path.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void merge_blocks(KeyIterator keys_first,
+ ValueIterator values_first,
+ KeyIterator keys_result,
+ ValueIterator values_result,
+ Compare compare,
+ size_t count,
+ const size_t block_size,
+ const bool sort_by_key,
+ command_queue &queue)
+{
+ (void) values_result;
+ (void) values_first;
+
+ meta_kernel k("merge_sort_on_cpu_merge_blocks");
+ size_t count_arg = k.add_arg<const uint_>("count");
+ size_t block_size_arg = k.add_arg<uint_>("block_size");
+
+ k <<
+ k.decl<uint_>("b1_start") << " = get_global_id(0) * block_size * 2;\n" <<
+ k.decl<uint_>("b1_end") << " = min(count, b1_start + block_size);\n" <<
+ k.decl<uint_>("b2_start") << " = min(count, b1_start + block_size);\n" <<
+ k.decl<uint_>("b2_end") << " = min(count, b2_start + block_size);\n" <<
+ k.decl<uint_>("result_idx") << " = b1_start;\n" <<
+
+ // merging block 1 and block 2 (stable)
+ "while(b1_start < b1_end && b2_start < b2_end){\n" <<
+ " if( " << compare(keys_first[k.var<uint_>("b2_start")],
+ keys_first[k.var<uint_>("b1_start")]) << "){\n" <<
+ " " << keys_result[k.var<uint_>("result_idx")] << " = " <<
+ keys_first[k.var<uint_>("b2_start")] << ";\n";
+ if(sort_by_key){
+ k <<
+ " " << values_result[k.var<uint_>("result_idx")] << " = " <<
+ values_first[k.var<uint_>("b2_start")] << ";\n";
+ }
+ k <<
+ " b2_start++;\n" <<
+ " }\n" <<
+ " else {\n" <<
+ " " << keys_result[k.var<uint_>("result_idx")] << " = " <<
+ keys_first[k.var<uint_>("b1_start")] << ";\n";
+ if(sort_by_key){
+ k <<
+ " " << values_result[k.var<uint_>("result_idx")] << " = " <<
+ values_first[k.var<uint_>("b1_start")] << ";\n";
+ }
+ k <<
+ " b1_start++;\n" <<
+ " }\n" <<
+ " result_idx++;\n" <<
+ "}\n" <<
+ "while(b1_start < b1_end){\n" <<
+ " " << keys_result[k.var<uint_>("result_idx")] << " = " <<
+ keys_first[k.var<uint_>("b1_start")] << ";\n";
+ if(sort_by_key){
+ k <<
+ " " << values_result[k.var<uint_>("result_idx")] << " = " <<
+ values_first[k.var<uint_>("b1_start")] << ";\n";
+ }
+ k <<
+ " b1_start++;\n" <<
+ " result_idx++;\n" <<
+ "}\n" <<
+ "while(b2_start < b2_end){\n" <<
+ " " << keys_result[k.var<uint_>("result_idx")] << " = " <<
+ keys_first[k.var<uint_>("b2_start")] << ";\n";
+ if(sort_by_key){
+ k <<
+ " " << values_result[k.var<uint_>("result_idx")] << " = " <<
+ values_first[k.var<uint_>("b2_start")] << ";\n";
+ }
+ k <<
+ " b2_start++;\n" <<
+ " result_idx++;\n" <<
+ "}\n";
+
+ const context &context = queue.get_context();
+ ::boost::compute::kernel kernel = k.compile(context);
+ kernel.set_arg(count_arg, static_cast<const uint_>(count));
+ kernel.set_arg(block_size_arg, static_cast<uint_>(block_size));
+
+ const size_t global_size = static_cast<size_t>(
+ std::ceil(float(count) / (2 * block_size))
+ );
+ queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0);
+}
+
+template<class Iterator, class Compare>
+inline void merge_blocks(Iterator first,
+ Iterator result,
+ Compare compare,
+ size_t count,
+ const size_t block_size,
+ const bool sort_by_key,
+ command_queue &queue)
+{
+ // dummy iterator as it's not sort by key
+ Iterator dummy;
+ merge_blocks(first, dummy, result, dummy, compare, count, block_size, false, queue);
+}
+
+template<class Iterator, class Compare>
+inline void dispatch_merge_blocks(Iterator first,
+ Iterator result,
+ Compare compare,
+ size_t count,
+ const size_t block_size,
+ const size_t input_size_threshold,
+ const size_t blocks_no_threshold,
+ command_queue &queue)
+{
+ const size_t blocks_no = static_cast<size_t>(
+ std::ceil(float(count) / block_size)
+ );
+ // merge with merge path should used only for the large arrays and at the
+ // end of merging part when there are only a few big blocks left to be merged
+ if(blocks_no <= blocks_no_threshold && count >= input_size_threshold){
+ Iterator last = first + count;
+ for(size_t i = 0; i < count; i+= 2*block_size)
+ {
+ Iterator first1 = (std::min)(first + i, last);
+ Iterator last1 = (std::min)(first1 + block_size, last);
+ Iterator first2 = last1;
+ Iterator last2 = (std::min)(first2 + block_size, last);
+ Iterator block_result = (std::min)(result + i, result + count);
+ merge_with_merge_path(first1, last1, first2, last2,
+ block_result, compare, queue);
+ }
+ }
+ else {
+ merge_blocks(first, result, compare, count, block_size, false, queue);
+ }
+}
+
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void block_insertion_sort(KeyIterator keys_first,
+ ValueIterator values_first,
+ Compare compare,
+ const size_t count,
+ const size_t block_size,
+ const bool sort_by_key,
+ command_queue &queue)
+{
+ (void) values_first;
+
+ typedef typename std::iterator_traits<KeyIterator>::value_type K;
+ typedef typename std::iterator_traits<ValueIterator>::value_type T;
+
+ meta_kernel k("merge_sort_on_cpu_block_insertion_sort");
+ size_t count_arg = k.add_arg<uint_>("count");
+ size_t block_size_arg = k.add_arg<uint_>("block_size");
+
+ k <<
+ k.decl<uint_>("start") << " = get_global_id(0) * block_size;\n" <<
+ k.decl<uint_>("end") << " = min(count, start + block_size);\n" <<
+
+ // block insertion sort (stable)
+ "for(uint i = start+1; i < end; i++){\n" <<
+ " " << k.decl<const K>("key") << " = " <<
+ keys_first[k.var<uint_>("i")] << ";\n";
+ if(sort_by_key){
+ k <<
+ " " << k.decl<const T>("value") << " = " <<
+ values_first[k.var<uint_>("i")] << ";\n";
+ }
+ k <<
+ " uint pos = i;\n" <<
+ " while(pos > start && " <<
+ compare(k.var<const K>("key"),
+ keys_first[k.var<uint_>("pos-1")]) << "){\n" <<
+ " " << keys_first[k.var<uint_>("pos")] << " = " <<
+ keys_first[k.var<uint_>("pos-1")] << ";\n";
+ if(sort_by_key){
+ k <<
+ " " << values_first[k.var<uint_>("pos")] << " = " <<
+ values_first[k.var<uint_>("pos-1")] << ";\n";
+ }
+ k <<
+ " pos--;\n" <<
+ " }\n" <<
+ " " << keys_first[k.var<uint_>("pos")] << " = key;\n";
+ if(sort_by_key) {
+ k <<
+ " " << values_first[k.var<uint_>("pos")] << " = value;\n";
+ }
+ k <<
+ "}\n"; // block insertion sort
+
+ const context &context = queue.get_context();
+ ::boost::compute::kernel kernel = k.compile(context);
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+ kernel.set_arg(block_size_arg, static_cast<uint_>(block_size));
+
+ const size_t global_size = static_cast<size_t>(std::ceil(float(count) / block_size));
+ queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0);
+}
+
+template<class Iterator, class Compare>
+inline void block_insertion_sort(Iterator first,
+ Compare compare,
+ const size_t count,
+ const size_t block_size,
+ command_queue &queue)
+{
+ // dummy iterator as it's not sort by key
+ Iterator dummy;
+ block_insertion_sort(first, dummy, compare, count, block_size, false, queue);
+}
+
+// This sort is stable.
+template<class Iterator, class Compare>
+inline void merge_sort_on_cpu(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ size_t count = iterator_range_size(first, last);
+ if(count < 2){
+ return;
+ }
+ // for small input size only insertion sort is performed
+ else if(count <= 512){
+ block_insertion_sort(first, compare, count, count, queue);
+ return;
+ }
+
+ const context &context = queue.get_context();
+ const device &device = queue.get_device();
+
+ // loading parameters
+ std::string cache_key =
+ std::string("__boost_merge_sort_on_cpu_") + type_name<value_type>();
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // When there is merge_with_path_blocks_no_threshold or less blocks left to
+ // merge AND input size is merge_with_merge_path_input_size_threshold or more
+ // merge_with_merge_path() algorithm is used to merge sorted blocks;
+ // otherwise merge_blocks() is used.
+ const size_t merge_with_path_blocks_no_threshold =
+ parameters->get(cache_key, "merge_with_merge_path_blocks_no_threshold", 8);
+ const size_t merge_with_path_input_size_threshold =
+ parameters->get(cache_key, "merge_with_merge_path_input_size_threshold", 2097152);
+
+ const size_t block_size =
+ parameters->get(cache_key, "insertion_sort_block_size", 64);
+ block_insertion_sort(first, compare, count, block_size, queue);
+
+ // temporary buffer for merge result
+ vector<value_type> temp(count, context);
+ bool result_in_temporary_buffer = false;
+
+ for(size_t i = block_size; i < count; i *= 2){
+ result_in_temporary_buffer = !result_in_temporary_buffer;
+ if(result_in_temporary_buffer) {
+ dispatch_merge_blocks(first, temp.begin(), compare, count, i,
+ merge_with_path_input_size_threshold,
+ merge_with_path_blocks_no_threshold,
+ queue);
+ } else {
+ dispatch_merge_blocks(temp.begin(), first, compare, count, i,
+ merge_with_path_input_size_threshold,
+ merge_with_path_blocks_no_threshold,
+ queue);
+ }
+ }
+
+ if(result_in_temporary_buffer) {
+ copy(temp.begin(), temp.end(), first, queue);
+ }
+}
+
+// This sort is stable.
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void merge_sort_by_key_on_cpu(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ Compare compare,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+ typedef typename std::iterator_traits<ValueIterator>::value_type value_type;
+
+ size_t count = iterator_range_size(keys_first, keys_last);
+ if(count < 2){
+ return;
+ }
+ // for small input size only insertion sort is performed
+ else if(count <= 512){
+ block_insertion_sort(keys_first, values_first, compare,
+ count, count, true, queue);
+ return;
+ }
+
+ const context &context = queue.get_context();
+ const device &device = queue.get_device();
+
+ // loading parameters
+ std::string cache_key =
+ std::string("__boost_merge_sort_by_key_on_cpu_") + type_name<value_type>()
+ + "_with_" + type_name<key_type>();
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ const size_t block_size =
+ parameters->get(cache_key, "insertion_sort_by_key_block_size", 64);
+ block_insertion_sort(keys_first, values_first, compare,
+ count, block_size, true, queue);
+
+ // temporary buffer for merge results
+ vector<value_type> values_temp(count, context);
+ vector<key_type> keys_temp(count, context);
+ bool result_in_temporary_buffer = false;
+
+ for(size_t i = block_size; i < count; i *= 2){
+ result_in_temporary_buffer = !result_in_temporary_buffer;
+ if(result_in_temporary_buffer) {
+ merge_blocks(keys_first, values_first,
+ keys_temp.begin(), values_temp.begin(),
+ compare, count, i, true, queue);
+ } else {
+ merge_blocks(keys_temp.begin(), values_temp.begin(),
+ keys_first, values_first,
+ compare, count, i, true, queue);
+ }
+ }
+
+ if(result_in_temporary_buffer) {
+ copy(keys_temp.begin(), keys_temp.end(), keys_first, queue);
+ copy(values_temp.begin(), values_temp.end(), values_first, queue);
+ }
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP
diff --git a/boost/compute/algorithm/detail/merge_with_merge_path.hpp b/boost/compute/algorithm/detail/merge_with_merge_path.hpp
new file mode 100644
index 0000000000..c3cc5e8e9c
--- /dev/null
+++ b/boost/compute/algorithm/detail/merge_with_merge_path.hpp
@@ -0,0 +1,203 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/detail/merge_path.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Serial merge kernel class
+///
+/// Subclass of meta_kernel to perform serial merge after tiling
+///
+class serial_merge_kernel : meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ serial_merge_kernel() : meta_kernel("merge")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator, class Compare>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator result,
+ Compare comp)
+ {
+ m_count = iterator_range_size(tile_first1, tile_last1) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
+ "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
+ "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
+ "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
+ "uint index = i*" << tile_size << ";\n" <<
+ "while(start1<end1 && start2<end2)\n" <<
+ "{\n" <<
+ " if(!(" << comp(first2[expr<uint_>("start2")],
+ first1[expr<uint_>("start1")]) << "))\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++;\n" <<
+ " start1++;\n" <<
+ " }\n" <<
+ " else\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first2[expr<uint_>("start2")] << ";\n" <<
+ " index++;\n" <<
+ " start2++;\n" <<
+ " }\n" <<
+ "}\n" <<
+ "while(start1<end1)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++;\n" <<
+ " start1++;\n" <<
+ "}\n" <<
+ "while(start2<end2)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first2[expr<uint_>("start2")] << ";\n" <<
+ " index++;\n" <<
+ " start2++;\n" <<
+ "}\n";
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator result)
+ {
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+ ::boost::compute::less<value_type> less_than;
+ set_range(first1, first2, tile_first1, tile_last1, tile_first2, result, less_than);
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+///
+/// \brief Merge algorithm with merge path
+///
+/// Merges the sorted values in the range [\p first1, \p last1) with
+/// the sorted values in the range [\p first2, last2) and stores the
+/// result in the range beginning at \p result
+///
+/// \param first1 Iterator pointing to start of first set
+/// \param last1 Iterator pointing to end of first set
+/// \param first2 Iterator pointing to start of second set
+/// \param last2 Iterator pointing to end of second set
+/// \param result Iterator pointing to start of range in which the result
+/// will be stored
+/// \param comp Comparator which performs less than function
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
+inline OutputIterator
+merge_with_merge_path(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ Compare comp,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename
+ std::iterator_traits<OutputIterator>::difference_type result_difference_type;
+
+ size_t tile_size = 1024;
+
+ size_t count1 = iterator_range_size(first1, last1);
+ size_t count2 = iterator_range_size(first2, last2);
+
+ vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+ vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+
+ // Tile the sets
+ merge_path_kernel tiling_kernel;
+ tiling_kernel.tile_size = static_cast<unsigned int>(tile_size);
+ tiling_kernel.set_range(first1, last1, first2, last2,
+ tile_a.begin()+1, tile_b.begin()+1, comp);
+ fill_n(tile_a.begin(), 1, uint_(0), queue);
+ fill_n(tile_b.begin(), 1, uint_(0), queue);
+ tiling_kernel.exec(queue);
+
+ fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue);
+ fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue);
+
+ // Merge
+ serial_merge_kernel merge_kernel;
+ merge_kernel.tile_size = static_cast<unsigned int>(tile_size);
+ merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
+ tile_b.begin(), result, comp);
+
+ merge_kernel.exec(queue);
+
+ return result + static_cast<result_difference_type>(count1 + count2);
+}
+
+/// \overload
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+inline OutputIterator
+merge_with_merge_path(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+ ::boost::compute::less<value_type> less_than;
+ return merge_with_merge_path(first1, last1, first2, last2, result, less_than, queue);
+}
+
+} //end detail namespace
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
diff --git a/boost/compute/algorithm/detail/radix_sort.hpp b/boost/compute/algorithm/detail/radix_sort.hpp
new file mode 100644
index 0000000000..c2ba4ed17c
--- /dev/null
+++ b/boost/compute/algorithm/detail/radix_sort.hpp
@@ -0,0 +1,415 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP
+
+#include <iterator>
+
+#include <boost/assert.hpp>
+#include <boost/type_traits/is_signed.hpp>
+#include <boost/type_traits/is_floating_point.hpp>
+
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/type_traits/is_fundamental.hpp>
+#include <boost/compute/type_traits/is_vector_type.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// meta-function returning true if type T is radix-sortable
+template<class T>
+struct is_radix_sortable :
+ boost::mpl::and_<
+ typename ::boost::compute::is_fundamental<T>::type,
+ typename boost::mpl::not_<typename is_vector_type<T>::type>::type
+ >
+{
+};
+
+template<size_t N>
+struct radix_sort_value_type
+{
+};
+
+template<>
+struct radix_sort_value_type<1>
+{
+ typedef uchar_ type;
+};
+
+template<>
+struct radix_sort_value_type<2>
+{
+ typedef ushort_ type;
+};
+
+template<>
+struct radix_sort_value_type<4>
+{
+ typedef uint_ type;
+};
+
+template<>
+struct radix_sort_value_type<8>
+{
+ typedef ulong_ type;
+};
+
+template<typename T>
+inline const char* enable_double()
+{
+ return " -DT2_double=0";
+}
+
+template<>
+inline const char* enable_double<double>()
+{
+ return " -DT2_double=1";
+}
+
+const char radix_sort_source[] =
+"#if T2_double\n"
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"#endif\n"
+"#define K2_BITS (1 << K_BITS)\n"
+"#define RADIX_MASK ((((T)(1)) << K_BITS) - 1)\n"
+"#define SIGN_BIT ((sizeof(T) * CHAR_BIT) - 1)\n"
+
+"inline uint radix(const T x, const uint low_bit)\n"
+"{\n"
+"#if defined(IS_FLOATING_POINT)\n"
+" const T mask = -(x >> SIGN_BIT) | (((T)(1)) << SIGN_BIT);\n"
+" return ((x ^ mask) >> low_bit) & RADIX_MASK;\n"
+"#elif defined(IS_SIGNED)\n"
+" return ((x ^ (((T)(1)) << SIGN_BIT)) >> low_bit) & RADIX_MASK;\n"
+"#else\n"
+" return (x >> low_bit) & RADIX_MASK;\n"
+"#endif\n"
+"}\n"
+
+"__kernel void count(__global const T *input,\n"
+" const uint input_offset,\n"
+" const uint input_size,\n"
+" __global uint *global_counts,\n"
+" __global uint *global_offsets,\n"
+" __local uint *local_counts,\n"
+" const uint low_bit)\n"
+"{\n"
+ // work-item parameters
+" const uint gid = get_global_id(0);\n"
+" const uint lid = get_local_id(0);\n"
+
+ // zero local counts
+" if(lid < K2_BITS){\n"
+" local_counts[lid] = 0;\n"
+" }\n"
+" barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+ // reduce local counts
+" if(gid < input_size){\n"
+" T value = input[input_offset+gid];\n"
+" uint bucket = radix(value, low_bit);\n"
+" atomic_inc(local_counts + bucket);\n"
+" }\n"
+" barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+ // write block-relative offsets
+" if(lid < K2_BITS){\n"
+" global_counts[K2_BITS*get_group_id(0) + lid] = local_counts[lid];\n"
+
+ // write global offsets
+" if(get_group_id(0) == (get_num_groups(0) - 1)){\n"
+" global_offsets[lid] = local_counts[lid];\n"
+" }\n"
+" }\n"
+"}\n"
+
+"__kernel void scan(__global const uint *block_offsets,\n"
+" __global uint *global_offsets,\n"
+" const uint block_count)\n"
+"{\n"
+" __global const uint *last_block_offsets =\n"
+" block_offsets + K2_BITS * (block_count - 1);\n"
+
+ // calculate and scan global_offsets
+" uint sum = 0;\n"
+" for(uint i = 0; i < K2_BITS; i++){\n"
+" uint x = global_offsets[i] + last_block_offsets[i];\n"
+" global_offsets[i] = sum;\n"
+" sum += x;\n"
+" }\n"
+"}\n"
+
+"__kernel void scatter(__global const T *input,\n"
+" const uint input_offset,\n"
+" const uint input_size,\n"
+" const uint low_bit,\n"
+" __global const uint *counts,\n"
+" __global const uint *global_offsets,\n"
+"#ifndef SORT_BY_KEY\n"
+" __global T *output,\n"
+" const uint output_offset)\n"
+"#else\n"
+" __global T *keys_output,\n"
+" const uint keys_output_offset,\n"
+" __global T2 *values_input,\n"
+" const uint values_input_offset,\n"
+" __global T2 *values_output,\n"
+" const uint values_output_offset)\n"
+"#endif\n"
+"{\n"
+ // work-item parameters
+" const uint gid = get_global_id(0);\n"
+" const uint lid = get_local_id(0);\n"
+
+ // copy input to local memory
+" T value;\n"
+" uint bucket;\n"
+" __local uint local_input[BLOCK_SIZE];\n"
+" if(gid < input_size){\n"
+" value = input[input_offset+gid];\n"
+" bucket = radix(value, low_bit);\n"
+" local_input[lid] = bucket;\n"
+" }\n"
+
+ // copy block counts to local memory
+" __local uint local_counts[(1 << K_BITS)];\n"
+" if(lid < K2_BITS){\n"
+" local_counts[lid] = counts[get_group_id(0) * K2_BITS + lid];\n"
+" }\n"
+
+ // wait until local memory is ready
+" barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+" if(gid >= input_size){\n"
+" return;\n"
+" }\n"
+
+ // get global offset
+" uint offset = global_offsets[bucket] + local_counts[bucket];\n"
+
+ // calculate local offset
+" uint local_offset = 0;\n"
+" for(uint i = 0; i < lid; i++){\n"
+" if(local_input[i] == bucket)\n"
+" local_offset++;\n"
+" }\n"
+
+"#ifndef SORT_BY_KEY\n"
+ // write value to output
+" output[output_offset + offset + local_offset] = value;\n"
+"#else\n"
+ // write key and value if doing sort_by_key
+" keys_output[keys_output_offset+offset + local_offset] = value;\n"
+" values_output[values_output_offset+offset + local_offset] =\n"
+" values_input[values_input_offset+gid];\n"
+"#endif\n"
+"}\n";
+
+template<class T, class T2>
+inline void radix_sort_impl(const buffer_iterator<T> first,
+ const buffer_iterator<T> last,
+ const buffer_iterator<T2> values_first,
+ command_queue &queue)
+{
+
+ typedef T value_type;
+ typedef typename radix_sort_value_type<sizeof(T)>::type sort_type;
+
+ const device &device = queue.get_device();
+ const context &context = queue.get_context();
+
+
+ // if we have a valid values iterator then we are doing a
+ // sort by key and have to set up the values buffer
+ bool sort_by_key = (values_first.get_buffer().get() != 0);
+
+ // load (or create) radix sort program
+ std::string cache_key =
+ std::string("__boost_radix_sort_") + type_name<value_type>();
+
+ if(sort_by_key){
+ cache_key += std::string("_with_") + type_name<T2>();
+ }
+
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(context);
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // sort parameters
+ const uint_ k = parameters->get(cache_key, "k", 4);
+ const uint_ k2 = 1 << k;
+ const uint_ block_size = parameters->get(cache_key, "tpb", 128);
+
+ // sort program compiler options
+ std::stringstream options;
+ options << "-DK_BITS=" << k;
+ options << " -DT=" << type_name<sort_type>();
+ options << " -DBLOCK_SIZE=" << block_size;
+
+ if(boost::is_floating_point<value_type>::value){
+ options << " -DIS_FLOATING_POINT";
+ }
+
+ if(boost::is_signed<value_type>::value){
+ options << " -DIS_SIGNED";
+ }
+
+ if(sort_by_key){
+ options << " -DSORT_BY_KEY";
+ options << " -DT2=" << type_name<T2>();
+ options << enable_double<T2>();
+ }
+
+ // load radix sort program
+ program radix_sort_program = cache->get_or_build(
+ cache_key, options.str(), radix_sort_source, context
+ );
+
+ kernel count_kernel(radix_sort_program, "count");
+ kernel scan_kernel(radix_sort_program, "scan");
+ kernel scatter_kernel(radix_sort_program, "scatter");
+
+ size_t count = detail::iterator_range_size(first, last);
+
+ uint_ block_count = static_cast<uint_>(count / block_size);
+ if(block_count * block_size != count){
+ block_count++;
+ }
+
+ // setup temporary buffers
+ vector<value_type> output(count, context);
+ vector<T2> values_output(sort_by_key ? count : 0, context);
+ vector<uint_> offsets(k2, context);
+ vector<uint_> counts(block_count * k2, context);
+
+ const buffer *input_buffer = &first.get_buffer();
+ uint_ input_offset = static_cast<uint_>(first.get_index());
+ const buffer *output_buffer = &output.get_buffer();
+ uint_ output_offset = 0;
+ const buffer *values_input_buffer = &values_first.get_buffer();
+ uint_ values_input_offset = static_cast<uint_>(values_first.get_index());
+ const buffer *values_output_buffer = &values_output.get_buffer();
+ uint_ values_output_offset = 0;
+
+ for(uint_ i = 0; i < sizeof(sort_type) * CHAR_BIT / k; i++){
+ // write counts
+ count_kernel.set_arg(0, *input_buffer);
+ count_kernel.set_arg(1, input_offset);
+ count_kernel.set_arg(2, static_cast<uint_>(count));
+ count_kernel.set_arg(3, counts);
+ count_kernel.set_arg(4, offsets);
+ count_kernel.set_arg(5, block_size * sizeof(uint_), 0);
+ count_kernel.set_arg(6, i * k);
+ queue.enqueue_1d_range_kernel(count_kernel,
+ 0,
+ block_count * block_size,
+ block_size);
+
+ // scan counts
+ if(k == 1){
+ typedef uint2_ counter_type;
+ ::boost::compute::exclusive_scan(
+ make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
+ make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 2),
+ make_buffer_iterator<counter_type>(counts.get_buffer()),
+ queue
+ );
+ }
+ else if(k == 2){
+ typedef uint4_ counter_type;
+ ::boost::compute::exclusive_scan(
+ make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
+ make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 4),
+ make_buffer_iterator<counter_type>(counts.get_buffer()),
+ queue
+ );
+ }
+ else if(k == 4){
+ typedef uint16_ counter_type;
+ ::boost::compute::exclusive_scan(
+ make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
+ make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 16),
+ make_buffer_iterator<counter_type>(counts.get_buffer()),
+ queue
+ );
+ }
+ else {
+ BOOST_ASSERT(false && "unknown k");
+ break;
+ }
+
+ // scan global offsets
+ scan_kernel.set_arg(0, counts);
+ scan_kernel.set_arg(1, offsets);
+ scan_kernel.set_arg(2, block_count);
+ queue.enqueue_task(scan_kernel);
+
+ // scatter values
+ scatter_kernel.set_arg(0, *input_buffer);
+ scatter_kernel.set_arg(1, input_offset);
+ scatter_kernel.set_arg(2, static_cast<uint_>(count));
+ scatter_kernel.set_arg(3, i * k);
+ scatter_kernel.set_arg(4, counts);
+ scatter_kernel.set_arg(5, offsets);
+ scatter_kernel.set_arg(6, *output_buffer);
+ scatter_kernel.set_arg(7, output_offset);
+ if(sort_by_key){
+ scatter_kernel.set_arg(8, *values_input_buffer);
+ scatter_kernel.set_arg(9, values_input_offset);
+ scatter_kernel.set_arg(10, *values_output_buffer);
+ scatter_kernel.set_arg(11, values_output_offset);
+ }
+ queue.enqueue_1d_range_kernel(scatter_kernel,
+ 0,
+ block_count * block_size,
+ block_size);
+
+ // swap buffers
+ std::swap(input_buffer, output_buffer);
+ std::swap(values_input_buffer, values_output_buffer);
+ std::swap(input_offset, output_offset);
+ std::swap(values_input_offset, values_output_offset);
+ }
+}
+
+template<class Iterator>
+inline void radix_sort(Iterator first,
+ Iterator last,
+ command_queue &queue)
+{
+ radix_sort_impl(first, last, buffer_iterator<int>(), queue);
+}
+
+template<class KeyIterator, class ValueIterator>
+inline void radix_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ command_queue &queue)
+{
+ radix_sort_impl(keys_first, keys_last, values_first, queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP
diff --git a/boost/compute/algorithm/detail/random_fill.hpp b/boost/compute/algorithm/detail/random_fill.hpp
new file mode 100644
index 0000000000..5c3827a9f8
--- /dev/null
+++ b/boost/compute/algorithm/detail/random_fill.hpp
@@ -0,0 +1,57 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP
+
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/random/default_random_engine.hpp>
+#include <boost/compute/random/uniform_real_distribution.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class OutputIterator, class Generator>
+inline void random_fill(OutputIterator first,
+ OutputIterator last,
+ Generator &g,
+ command_queue &queue)
+{
+ g.fill(first, last, queue);
+}
+
+template<class OutputIterator>
+inline void
+random_fill(OutputIterator first,
+ OutputIterator last,
+ typename std::iterator_traits<OutputIterator>::value_type lo,
+ typename std::iterator_traits<OutputIterator>::value_type hi,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type value_type;
+ typedef typename
+ boost::compute::default_random_engine engine_type;
+ typedef typename
+ boost::compute::uniform_real_distribution<value_type> distribution_type;
+
+ engine_type engine(queue);
+ distribution_type generator(lo, hi);
+ generator.fill(first, last, engine, queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP
diff --git a/boost/compute/algorithm/detail/reduce_by_key.hpp b/boost/compute/algorithm/detail/reduce_by_key.hpp
new file mode 100644
index 0000000000..65844c9ebf
--- /dev/null
+++ b/boost/compute/algorithm/detail/reduce_by_key.hpp
@@ -0,0 +1,119 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP
+
+#include <algorithm>
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/algorithm/detail/serial_reduce_by_key.hpp>
+#include <boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp>
+#include <boost/compute/type_traits.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction, class BinaryPredicate>
+size_t reduce_by_key_on_gpu(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ BinaryFunction function,
+ BinaryPredicate predicate,
+ command_queue &queue)
+{
+ return detail::reduce_by_key_with_scan(keys_first, keys_last, values_first,
+ keys_result, values_result, function,
+ predicate, queue);
+}
+
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator>
+bool reduce_by_key_on_gpu_requirements_met(InputKeyIterator keys_first,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ const size_t count,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+ return (count > 256)
+ && !(device.type() & device::cpu)
+ && reduce_by_key_with_scan_requirements_met(keys_first, values_first,
+ keys_result,values_result,
+ count, queue);
+ return true;
+}
+
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction, class BinaryPredicate>
+inline std::pair<OutputKeyIterator, OutputValueIterator>
+dispatch_reduce_by_key(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ BinaryFunction function,
+ BinaryPredicate predicate,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<OutputKeyIterator>::difference_type key_difference_type;
+ typedef typename
+ std::iterator_traits<OutputValueIterator>::difference_type value_difference_type;
+
+ const size_t count = detail::iterator_range_size(keys_first, keys_last);
+ if (count < 2) {
+ boost::compute::copy_n(keys_first, count, keys_result, queue);
+ boost::compute::copy_n(values_first, count, values_result, queue);
+ return
+ std::make_pair<OutputKeyIterator, OutputValueIterator>(
+ keys_result + static_cast<key_difference_type>(count),
+ values_result + static_cast<value_difference_type>(count)
+ );
+ }
+
+ size_t result_size = 0;
+ if(reduce_by_key_on_gpu_requirements_met(keys_first, values_first, keys_result,
+ values_result, count, queue)){
+ result_size =
+ detail::reduce_by_key_on_gpu(keys_first, keys_last, values_first,
+ keys_result, values_result, function,
+ predicate, queue);
+ }
+ else {
+ result_size =
+ detail::serial_reduce_by_key(keys_first, keys_last, values_first,
+ keys_result, values_result, function,
+ predicate, queue);
+ }
+
+ return
+ std::make_pair<OutputKeyIterator, OutputValueIterator>(
+ keys_result + static_cast<key_difference_type>(result_size),
+ values_result + static_cast<value_difference_type>(result_size)
+ );
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP
diff --git a/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp b/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp
new file mode 100644
index 0000000000..e6852a67eb
--- /dev/null
+++ b/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp
@@ -0,0 +1,541 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP
+
+#include <algorithm>
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/algorithm/inclusive_scan.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/type_traits.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+/// \internal_
+///
+/// Fills \p new_keys_first with unsigned integer keys generated from vector
+/// of original keys \p keys_first. New keys can be distinguish by simple equality
+/// predicate.
+///
+/// \param keys_first iterator pointing to the first key
+/// \param number_of_keys number of keys
+/// \param predicate binary predicate for key comparison
+/// \param new_keys_first iterator pointing to the new keys vector
+/// \param preferred_work_group_size preferred work group size
+/// \param queue command queue to perform the operation
+///
+/// Binary function \p predicate must take two keys as arguments and
+/// return true only if they are considered the same.
+///
+/// The first new key equals zero and the last equals number of unique keys
+/// minus one.
+///
+/// No local memory usage.
+template<class InputKeyIterator, class BinaryPredicate>
+inline void generate_uint_keys(InputKeyIterator keys_first,
+ size_t number_of_keys,
+ BinaryPredicate predicate,
+ vector<uint_>::iterator new_keys_first,
+ size_t preferred_work_group_size,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputKeyIterator>::value_type key_type;
+
+ detail::meta_kernel k("reduce_by_key_new_key_flags");
+ k.add_set_arg<const uint_>("count", uint_(number_of_keys));
+
+ k <<
+ k.decl<const uint_>("gid") << " = get_global_id(0);\n" <<
+ k.decl<uint_>("value") << " = 0;\n" <<
+ "if(gid >= count){\n return;\n}\n" <<
+ "if(gid > 0){ \n" <<
+ k.decl<key_type>("key") << " = " <<
+ keys_first[k.var<const uint_>("gid")] << ";\n" <<
+ k.decl<key_type>("previous_key") << " = " <<
+ keys_first[k.var<const uint_>("gid - 1")] << ";\n" <<
+ " value = " << predicate(k.var<key_type>("previous_key"),
+ k.var<key_type>("key")) <<
+ " ? 0 : 1;\n" <<
+ "}\n else {\n" <<
+ " value = 0;\n" <<
+ "}\n" <<
+ new_keys_first[k.var<const uint_>("gid")] << " = value;\n";
+
+ const context &context = queue.get_context();
+ kernel kernel = k.compile(context);
+
+ size_t work_group_size = preferred_work_group_size;
+ size_t work_groups_no = static_cast<size_t>(
+ std::ceil(float(number_of_keys) / work_group_size)
+ );
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ work_groups_no * work_group_size,
+ work_group_size);
+
+ inclusive_scan(new_keys_first, new_keys_first + number_of_keys,
+ new_keys_first, queue);
+}
+
+/// \internal_
+/// Calculate carry-out for each work group.
+/// Carry-out is a pair of the last key processed by a work group and sum of all
+/// values under this key in this work group.
+template<class InputValueIterator, class OutputValueIterator, class BinaryFunction>
+inline void carry_outs(vector<uint_>::iterator keys_first,
+ InputValueIterator values_first,
+ size_t count,
+ vector<uint_>::iterator carry_out_keys_first,
+ OutputValueIterator carry_out_values_first,
+ BinaryFunction function,
+ size_t work_group_size,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<OutputValueIterator>::value_type value_out_type;
+
+ detail::meta_kernel k("reduce_by_key_with_scan_carry_outs");
+ k.add_set_arg<const uint_>("count", uint_(count));
+ size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys");
+ size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals");
+
+ k <<
+ k.decl<const uint_>("gid") << " = get_global_id(0);\n" <<
+ k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" <<
+ k.decl<const uint_>("lid") << " = get_local_id(0);\n" <<
+ k.decl<const uint_>("group_id") << " = get_group_id(0);\n" <<
+
+ k.decl<uint_>("key") << ";\n" <<
+ k.decl<value_out_type>("value") << ";\n" <<
+ "if(gid < count){\n" <<
+ k.var<uint_>("key") << " = " <<
+ keys_first[k.var<const uint_>("gid")] << ";\n" <<
+ k.var<value_out_type>("value") << " = " <<
+ values_first[k.var<const uint_>("gid")] << ";\n" <<
+ "lkeys[lid] = key;\n" <<
+ "lvals[lid] = value;\n" <<
+ "}\n" <<
+
+ // Calculate carry out for each work group by performing Hillis/Steele scan
+ // where only last element (key-value pair) is saved
+ k.decl<value_out_type>("result") << " = value;\n" <<
+ k.decl<uint_>("other_key") << ";\n" <<
+ k.decl<value_out_type>("other_value") << ";\n" <<
+
+ "for(" << k.decl<uint_>("offset") << " = 1; " <<
+ "offset < wg_size; offset *= 2){\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " if(lid >= offset){\n"
+ " other_key = lkeys[lid - offset];\n" <<
+ " if(other_key == key){\n" <<
+ " other_value = lvals[lid - offset];\n" <<
+ " result = " << function(k.var<value_out_type>("result"),
+ k.var<value_out_type>("other_value")) << ";\n" <<
+ " }\n" <<
+ " }\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " lvals[lid] = result;\n" <<
+ "}\n" <<
+
+ // save carry out
+ "if(lid == (wg_size - 1)){\n" <<
+ carry_out_keys_first[k.var<const uint_>("group_id")] << " = key;\n" <<
+ carry_out_values_first[k.var<const uint_>("group_id")] << " = result;\n" <<
+ "}\n";
+
+ size_t work_groups_no = static_cast<size_t>(
+ std::ceil(float(count) / work_group_size)
+ );
+
+ const context &context = queue.get_context();
+ kernel kernel = k.compile(context);
+ kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size));
+ kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ work_groups_no * work_group_size,
+ work_group_size);
+}
+
+/// \internal_
+/// Calculate carry-in by performing inclusive scan by key on carry-outs vector.
+template<class OutputValueIterator, class BinaryFunction>
+inline void carry_ins(vector<uint_>::iterator carry_out_keys_first,
+ OutputValueIterator carry_out_values_first,
+ OutputValueIterator carry_in_values_first,
+ size_t carry_out_size,
+ BinaryFunction function,
+ size_t work_group_size,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<OutputValueIterator>::value_type value_out_type;
+
+ uint_ values_pre_work_item = static_cast<uint_>(
+ std::ceil(float(carry_out_size) / work_group_size)
+ );
+
+ detail::meta_kernel k("reduce_by_key_with_scan_carry_ins");
+ k.add_set_arg<const uint_>("carry_out_size", uint_(carry_out_size));
+ k.add_set_arg<const uint_>("values_per_work_item", values_pre_work_item);
+ size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys");
+ size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals");
+
+ k <<
+ k.decl<uint_>("id") << " = get_global_id(0) * values_per_work_item;\n" <<
+ k.decl<uint_>("idx") << " = id;\n" <<
+ k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" <<
+ k.decl<const uint_>("lid") << " = get_local_id(0);\n" <<
+ k.decl<const uint_>("group_id") << " = get_group_id(0);\n" <<
+
+ k.decl<uint_>("key") << ";\n" <<
+ k.decl<value_out_type>("value") << ";\n" <<
+ k.decl<uint_>("previous_key") << ";\n" <<
+ k.decl<value_out_type>("result") << ";\n" <<
+
+ "if(id < carry_out_size){\n" <<
+ k.var<uint_>("previous_key") << " = " <<
+ carry_out_keys_first[k.var<const uint_>("id")] << ";\n" <<
+ k.var<value_out_type>("result") << " = " <<
+ carry_out_values_first[k.var<const uint_>("id")] << ";\n" <<
+ carry_in_values_first[k.var<const uint_>("id")] << " = result;\n" <<
+ "}\n" <<
+
+ k.decl<const uint_>("end") << " = (id + values_per_work_item) <= carry_out_size" <<
+ " ? (values_per_work_item + id) : carry_out_size;\n" <<
+
+ "for(idx = idx + 1; idx < end; idx += 1){\n" <<
+ " key = " << carry_out_keys_first[k.var<const uint_>("idx")] << ";\n" <<
+ " value = " << carry_out_values_first[k.var<const uint_>("idx")] << ";\n" <<
+ " if(previous_key == key){\n" <<
+ " result = " << function(k.var<value_out_type>("result"),
+ k.var<value_out_type>("value")) << ";\n" <<
+ " }\n else { \n" <<
+ " result = value;\n"
+ " }\n" <<
+ " " << carry_in_values_first[k.var<const uint_>("idx")] << " = result;\n" <<
+ " previous_key = key;\n"
+ "}\n" <<
+
+ // save the last key and result to local memory
+ "lkeys[lid] = previous_key;\n" <<
+ "lvals[lid] = result;\n" <<
+
+ // Hillis/Steele scan
+ "for(" << k.decl<uint_>("offset") << " = 1; " <<
+ "offset < wg_size; offset *= 2){\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " if(lid >= offset){\n"
+ " key = lkeys[lid - offset];\n" <<
+ " if(previous_key == key){\n" <<
+ " value = lvals[lid - offset];\n" <<
+ " result = " << function(k.var<value_out_type>("result"),
+ k.var<value_out_type>("value")) << ";\n" <<
+ " }\n" <<
+ " }\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " lvals[lid] = result;\n" <<
+ "}\n" <<
+ "barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+
+ "if(lid > 0){\n" <<
+ // load key-value reduced by previous work item
+ " previous_key = lkeys[lid - 1];\n" <<
+ " result = lvals[lid - 1];\n" <<
+ "}\n" <<
+
+ // add key-value reduced by previous work item
+ "for(idx = id; idx < id + values_per_work_item; idx += 1){\n" <<
+ // make sure all carry-ins are saved in global memory
+ " barrier( CLK_GLOBAL_MEM_FENCE );\n" <<
+ " if(lid > 0 && idx < carry_out_size) {\n"
+ " key = " << carry_out_keys_first[k.var<const uint_>("idx")] << ";\n" <<
+ " value = " << carry_in_values_first[k.var<const uint_>("idx")] << ";\n" <<
+ " if(previous_key == key){\n" <<
+ " value = " << function(k.var<value_out_type>("result"),
+ k.var<value_out_type>("value")) << ";\n" <<
+ " }\n" <<
+ " " << carry_in_values_first[k.var<const uint_>("idx")] << " = value;\n" <<
+ " }\n" <<
+ "}\n";
+
+
+ const context &context = queue.get_context();
+ kernel kernel = k.compile(context);
+ kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size));
+ kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ work_group_size,
+ work_group_size);
+}
+
+/// \internal_
+///
+/// Perform final reduction by key. Each work item:
+/// 1. Perform local work-group reduction (Hillis/Steele scan)
+/// 2. Add carry-in (if keys are right)
+/// 3. Save reduced value if next key is different than processed one
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction>
+inline void final_reduction(InputKeyIterator keys_first,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ size_t count,
+ BinaryFunction function,
+ vector<uint_>::iterator new_keys_first,
+ vector<uint_>::iterator carry_in_keys_first,
+ OutputValueIterator carry_in_values_first,
+ size_t carry_in_size,
+ size_t work_group_size,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<OutputValueIterator>::value_type value_out_type;
+
+ detail::meta_kernel k("reduce_by_key_with_scan_final_reduction");
+ k.add_set_arg<const uint_>("count", uint_(count));
+ size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys");
+ size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals");
+
+ k <<
+ k.decl<const uint_>("gid") << " = get_global_id(0);\n" <<
+ k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" <<
+ k.decl<const uint_>("lid") << " = get_local_id(0);\n" <<
+ k.decl<const uint_>("group_id") << " = get_group_id(0);\n" <<
+
+ k.decl<uint_>("key") << ";\n" <<
+ k.decl<value_out_type>("value") << ";\n"
+
+ "if(gid < count){\n" <<
+ k.var<uint_>("key") << " = " <<
+ new_keys_first[k.var<const uint_>("gid")] << ";\n" <<
+ k.var<value_out_type>("value") << " = " <<
+ values_first[k.var<const uint_>("gid")] << ";\n" <<
+ "lkeys[lid] = key;\n" <<
+ "lvals[lid] = value;\n" <<
+ "}\n" <<
+
+ // Hillis/Steele scan
+ k.decl<value_out_type>("result") << " = value;\n" <<
+ k.decl<uint_>("other_key") << ";\n" <<
+ k.decl<value_out_type>("other_value") << ";\n" <<
+
+ "for(" << k.decl<uint_>("offset") << " = 1; " <<
+ "offset < wg_size ; offset *= 2){\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " if(lid >= offset) {\n" <<
+ " other_key = lkeys[lid - offset];\n" <<
+ " if(other_key == key){\n" <<
+ " other_value = lvals[lid - offset];\n" <<
+ " result = " << function(k.var<value_out_type>("result"),
+ k.var<value_out_type>("other_value")) << ";\n" <<
+ " }\n" <<
+ " }\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " lvals[lid] = result;\n" <<
+ "}\n" <<
+
+ "if(gid >= count) {\n return;\n};\n" <<
+
+ k.decl<const bool>("save") << " = (gid < (count - 1)) ?"
+ << new_keys_first[k.var<const uint_>("gid + 1")] << " != key" <<
+ ": true;\n" <<
+
+ // Add carry in
+ k.decl<uint_>("carry_in_key") << ";\n" <<
+ "if(group_id > 0 && save) {\n" <<
+ " carry_in_key = " << carry_in_keys_first[k.var<const uint_>("group_id - 1")] << ";\n" <<
+ " if(key == carry_in_key){\n" <<
+ " other_value = " << carry_in_values_first[k.var<const uint_>("group_id - 1")] << ";\n" <<
+ " result = " << function(k.var<value_out_type>("result"),
+ k.var<value_out_type>("other_value")) << ";\n" <<
+ " }\n" <<
+ "}\n" <<
+
+ // Save result only if the next key is different or it's the last element.
+ "if(save){\n" <<
+ keys_result[k.var<uint_>("key")] << " = " << keys_first[k.var<const uint_>("gid")] << ";\n" <<
+ values_result[k.var<uint_>("key")] << " = result;\n" <<
+ "}\n"
+ ;
+
+ size_t work_groups_no = static_cast<size_t>(
+ std::ceil(float(count) / work_group_size)
+ );
+
+ const context &context = queue.get_context();
+ kernel kernel = k.compile(context);
+ kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size));
+ kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ work_groups_no * work_group_size,
+ work_group_size);
+}
+
+/// \internal_
+/// Returns preferred work group size for reduce by key with scan algorithm.
+template<class KeyType, class ValueType>
+inline size_t get_work_group_size(const device& device)
+{
+ std::string cache_key = std::string("__boost_reduce_by_key_with_scan")
+ + "k_" + type_name<KeyType>() + "_v_" + type_name<ValueType>();
+
+ // load parameters
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ return (std::max)(
+ static_cast<size_t>(parameters->get(cache_key, "wgsize", 256)),
+ static_cast<size_t>(device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>())
+ );
+}
+
+/// \internal_
+///
+/// 1. For each work group carry-out value is calculated (it's done by key-oriented
+/// Hillis/Steele scan). Carry-out is a pair of the last key processed by work
+/// group and sum of all values under this key in work group.
+/// 2. From every carry-out carry-in is calculated by performing inclusive scan
+/// by key.
+/// 3. Final reduction by key is performed (key-oriented Hillis/Steele scan),
+/// carry-in values are added where needed.
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction, class BinaryPredicate>
+inline size_t reduce_by_key_with_scan(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ BinaryFunction function,
+ BinaryPredicate predicate,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputValueIterator>::value_type value_type;
+ typedef typename
+ std::iterator_traits<InputKeyIterator>::value_type key_type;
+ typedef typename
+ std::iterator_traits<OutputValueIterator>::value_type value_out_type;
+
+ const context &context = queue.get_context();
+ size_t count = detail::iterator_range_size(keys_first, keys_last);
+
+ if(count == 0){
+ return size_t(0);
+ }
+
+ const device &device = queue.get_device();
+ size_t work_group_size = get_work_group_size<value_type, key_type>(device);
+
+ // Replace original key with unsigned integer keys generated based on given
+ // predicate. New key is also an index for keys_result and values_result vectors,
+ // which points to place where reduced value should be saved.
+ vector<uint_> new_keys(count, context);
+ vector<uint_>::iterator new_keys_first = new_keys.begin();
+ generate_uint_keys(keys_first, count, predicate, new_keys_first,
+ work_group_size, queue);
+
+ // Calculate carry-out and carry-in vectors size
+ const size_t carry_out_size = static_cast<size_t>(
+ std::ceil(float(count) / work_group_size)
+ );
+ vector<uint_> carry_out_keys(carry_out_size, context);
+ vector<value_out_type> carry_out_values(carry_out_size, context);
+ carry_outs(new_keys_first, values_first, count, carry_out_keys.begin(),
+ carry_out_values.begin(), function, work_group_size, queue);
+
+ vector<value_out_type> carry_in_values(carry_out_size, context);
+ carry_ins(carry_out_keys.begin(), carry_out_values.begin(),
+ carry_in_values.begin(), carry_out_size, function, work_group_size,
+ queue);
+
+ final_reduction(keys_first, values_first, keys_result, values_result,
+ count, function, new_keys_first, carry_out_keys.begin(),
+ carry_in_values.begin(), carry_out_size, work_group_size,
+ queue);
+
+ const size_t result = read_single_value<uint_>(new_keys.get_buffer(),
+ count - 1, queue);
+ return result + 1;
+}
+
+/// \internal_
+/// Return true if requirements for running reduce by key with scan on given
+/// device are met (at least one work group of preferred size can be run).
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator>
+bool reduce_by_key_with_scan_requirements_met(InputKeyIterator keys_first,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ const size_t count,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputValueIterator>::value_type value_type;
+ typedef typename
+ std::iterator_traits<InputKeyIterator>::value_type key_type;
+ typedef typename
+ std::iterator_traits<OutputValueIterator>::value_type value_out_type;
+
+ (void) keys_first;
+ (void) values_first;
+ (void) keys_result;
+ (void) values_result;
+
+ const device &device = queue.get_device();
+ // device must have dedicated local memory storage
+ if(device.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() != CL_LOCAL)
+ {
+ return false;
+ }
+
+ // local memory size in bytes (per compute unit)
+ const size_t local_mem_size = device.get_info<CL_DEVICE_LOCAL_MEM_SIZE>();
+
+ // preferred work group size
+ size_t work_group_size = get_work_group_size<key_type, value_type>(device);
+
+ // local memory size needed to perform parallel reduction
+ size_t required_local_mem_size = 0;
+ // keys size
+ required_local_mem_size += sizeof(uint_) * work_group_size;
+ // reduced values size
+ required_local_mem_size += sizeof(value_out_type) * work_group_size;
+
+ return (required_local_mem_size <= local_mem_size);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP
diff --git a/boost/compute/algorithm/detail/reduce_on_gpu.hpp b/boost/compute/algorithm/detail/reduce_on_gpu.hpp
new file mode 100644
index 0000000000..335fba8724
--- /dev/null
+++ b/boost/compute/algorithm/detail/reduce_on_gpu.hpp
@@ -0,0 +1,286 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP
+
+#include <iterator>
+
+#include <boost/compute/utility/source.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/vendor.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+#include <boost/compute/detail/work_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+/// \internal
+/// body reduction inside a warp
+template<typename T,bool isNvidiaDevice>
+struct ReduceBody
+{
+ static std::string body()
+ {
+ std::stringstream k;
+ // local reduction
+ k << "for(int i = 1; i < TPB; i <<= 1){\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " uint mask = (i << 1) - 1;\n" <<
+ " if((lid & mask) == 0){\n" <<
+ " scratch[lid] += scratch[lid+i];\n" <<
+ " }\n" <<
+ "}\n";
+ return k.str();
+ }
+};
+
+/// \internal
+/// body reduction inside a warp
+/// for nvidia device we can use the "unsafe"
+/// memory optimisation
+template<typename T>
+struct ReduceBody<T,true>
+{
+ static std::string body()
+ {
+ std::stringstream k;
+ // local reduction
+ // we use TPB to compile only useful instruction
+ // local reduction when size is greater than warp size
+ k << "barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ "if(TPB >= 1024){\n" <<
+ "if(lid < 512) { sum += scratch[lid + 512]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" <<
+ "if(TPB >= 512){\n" <<
+ "if(lid < 256) { sum += scratch[lid + 256]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" <<
+ "if(TPB >= 256){\n" <<
+ "if(lid < 128) { sum += scratch[lid + 128]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" <<
+ "if(TPB >= 128){\n" <<
+ "if(lid < 64) { sum += scratch[lid + 64]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);} \n" <<
+
+ // warp reduction
+ "if(lid < 32){\n" <<
+ // volatile this way we don't need any barrier
+ "volatile __local " << type_name<T>() << " *lmem = scratch;\n" <<
+ "if(TPB >= 64) { lmem[lid] = sum = sum + lmem[lid+32];} \n" <<
+ "if(TPB >= 32) { lmem[lid] = sum = sum + lmem[lid+16];} \n" <<
+ "if(TPB >= 16) { lmem[lid] = sum = sum + lmem[lid+ 8];} \n" <<
+ "if(TPB >= 8) { lmem[lid] = sum = sum + lmem[lid+ 4];} \n" <<
+ "if(TPB >= 4) { lmem[lid] = sum = sum + lmem[lid+ 2];} \n" <<
+ "if(TPB >= 2) { lmem[lid] = sum = sum + lmem[lid+ 1];} \n" <<
+ "}\n";
+ return k.str();
+ }
+};
+
+template<class InputIterator, class Function>
+inline void initial_reduce(InputIterator first,
+ InputIterator last,
+ buffer result,
+ const Function &function,
+ kernel &reduce_kernel,
+ const uint_ vpt,
+ const uint_ tpb,
+ command_queue &queue)
+{
+ (void) function;
+ (void) reduce_kernel;
+
+ typedef typename std::iterator_traits<InputIterator>::value_type Arg;
+ typedef typename boost::tr1_result_of<Function(Arg, Arg)>::type T;
+
+ size_t count = std::distance(first, last);
+ detail::meta_kernel k("initial_reduce");
+ k.add_set_arg<const uint_>("count", uint_(count));
+ size_t output_arg = k.add_arg<T *>(memory_object::global_memory, "output");
+
+ k <<
+ k.decl<const uint_>("offset") << " = get_group_id(0) * VPT * TPB;\n" <<
+ k.decl<const uint_>("lid") << " = get_local_id(0);\n" <<
+
+ "__local " << type_name<T>() << " scratch[TPB];\n" <<
+
+ // private reduction
+ k.decl<T>("sum") << " = 0;\n" <<
+ "for(uint i = 0; i < VPT; i++){\n" <<
+ " if(offset + lid + i*TPB < count){\n" <<
+ " sum = sum + " << first[k.var<uint_>("offset+lid+i*TPB")] << ";\n" <<
+ " }\n" <<
+ "}\n" <<
+
+ "scratch[lid] = sum;\n" <<
+
+ // local reduction
+ ReduceBody<T,false>::body() <<
+
+ // write sum to output
+ "if(lid == 0){\n" <<
+ " output[get_group_id(0)] = scratch[0];\n" <<
+ "}\n";
+
+ const context &context = queue.get_context();
+ std::stringstream options;
+ options << "-DVPT=" << vpt << " -DTPB=" << tpb;
+ kernel generic_reduce_kernel = k.compile(context, options.str());
+ generic_reduce_kernel.set_arg(output_arg, result);
+
+ size_t work_size = calculate_work_size(count, vpt, tpb);
+
+ queue.enqueue_1d_range_kernel(generic_reduce_kernel, 0, work_size, tpb);
+}
+
+template<class T>
+inline void initial_reduce(const buffer_iterator<T> &first,
+ const buffer_iterator<T> &last,
+ const buffer &result,
+ const plus<T> &function,
+ kernel &reduce_kernel,
+ const uint_ vpt,
+ const uint_ tpb,
+ command_queue &queue)
+{
+ (void) function;
+
+ size_t count = std::distance(first, last);
+
+ reduce_kernel.set_arg(0, first.get_buffer());
+ reduce_kernel.set_arg(1, uint_(first.get_index()));
+ reduce_kernel.set_arg(2, uint_(count));
+ reduce_kernel.set_arg(3, result);
+ reduce_kernel.set_arg(4, uint_(0));
+
+ size_t work_size = calculate_work_size(count, vpt, tpb);
+
+ queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb);
+}
+
+template<class InputIterator, class T, class Function>
+inline void reduce_on_gpu(InputIterator first,
+ InputIterator last,
+ buffer_iterator<T> result,
+ Function function,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("reduce");
+ k.add_arg<const T*>(memory_object::global_memory, "input");
+ k.add_arg<const uint_>("offset");
+ k.add_arg<const uint_>("count");
+ k.add_arg<T*>(memory_object::global_memory, "output");
+ k.add_arg<const uint_>("output_offset");
+
+ k <<
+ k.decl<const uint_>("block_offset") << " = get_group_id(0) * VPT * TPB;\n" <<
+ "__global const " << type_name<T>() << " *block = input + offset + block_offset;\n" <<
+ k.decl<const uint_>("lid") << " = get_local_id(0);\n" <<
+
+ "__local " << type_name<T>() << " scratch[TPB];\n" <<
+ // private reduction
+ k.decl<T>("sum") << " = 0;\n" <<
+ "for(uint i = 0; i < VPT; i++){\n" <<
+ " if(block_offset + lid + i*TPB < count){\n" <<
+ " sum = sum + block[lid+i*TPB]; \n" <<
+ " }\n" <<
+ "}\n" <<
+
+ "scratch[lid] = sum;\n";
+
+ // discrimination on vendor name
+ if(is_nvidia_device(device))
+ k << ReduceBody<T,true>::body();
+ else
+ k << ReduceBody<T,false>::body();
+
+ k <<
+ // write sum to output
+ "if(lid == 0){\n" <<
+ " output[output_offset + get_group_id(0)] = scratch[0];\n" <<
+ "}\n";
+
+ std::string cache_key = std::string("__boost_reduce_on_gpu_") + type_name<T>();
+
+ // load parameters
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ uint_ vpt = parameters->get(cache_key, "vpt", 8);
+ uint_ tpb = parameters->get(cache_key, "tpb", 128);
+
+ // reduce program compiler flags
+ std::stringstream options;
+ options << "-DT=" << type_name<T>()
+ << " -DVPT=" << vpt
+ << " -DTPB=" << tpb;
+
+ // load program
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(context);
+
+ program reduce_program = cache->get_or_build(
+ cache_key, options.str(), k.source(), context
+ );
+
+ // create reduce kernel
+ kernel reduce_kernel(reduce_program, "reduce");
+
+ size_t count = std::distance(first, last);
+
+ // first pass, reduce from input to ping
+ buffer ping(context, std::ceil(float(count) / vpt / tpb) * sizeof(T));
+ initial_reduce(first, last, ping, function, reduce_kernel, vpt, tpb, queue);
+
+ // update count after initial reduce
+ count = static_cast<size_t>(std::ceil(float(count) / vpt / tpb));
+
+ // middle pass(es), reduce between ping and pong
+ const buffer *input_buffer = &ping;
+ buffer pong(context, static_cast<size_t>(count / vpt / tpb * sizeof(T)));
+ const buffer *output_buffer = &pong;
+ if(count > vpt * tpb){
+ while(count > vpt * tpb){
+ reduce_kernel.set_arg(0, *input_buffer);
+ reduce_kernel.set_arg(1, uint_(0));
+ reduce_kernel.set_arg(2, uint_(count));
+ reduce_kernel.set_arg(3, *output_buffer);
+ reduce_kernel.set_arg(4, uint_(0));
+
+ size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt));
+ if(work_size % tpb != 0){
+ work_size += tpb - work_size % tpb;
+ }
+ queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb);
+
+ std::swap(input_buffer, output_buffer);
+ count = static_cast<size_t>(std::ceil(float(count) / vpt / tpb));
+ }
+ }
+
+ // final pass, reduce from ping/pong to result
+ reduce_kernel.set_arg(0, *input_buffer);
+ reduce_kernel.set_arg(1, uint_(0));
+ reduce_kernel.set_arg(2, uint_(count));
+ reduce_kernel.set_arg(3, result.get_buffer());
+ reduce_kernel.set_arg(4, uint_(result.get_index()));
+
+ queue.enqueue_1d_range_kernel(reduce_kernel, 0, tpb, tpb);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP
diff --git a/boost/compute/algorithm/detail/scan.hpp b/boost/compute/algorithm/detail/scan.hpp
new file mode 100644
index 0000000000..154b6001be
--- /dev/null
+++ b/boost/compute/algorithm/detail/scan.hpp
@@ -0,0 +1,45 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP
+
+#include <boost/compute/device.hpp>
+#include <boost/compute/algorithm/detail/scan_on_cpu.hpp>
+#include <boost/compute/algorithm/detail/scan_on_gpu.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
+inline OutputIterator scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ bool exclusive,
+ T init,
+ BinaryOperator op,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+
+ if(device.type() & device::cpu){
+ return scan_on_cpu(first, last, result, exclusive, init, op, queue);
+ }
+ else {
+ return scan_on_gpu(first, last, result, exclusive, init, op, queue);
+ }
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP
diff --git a/boost/compute/algorithm/detail/scan_on_cpu.hpp b/boost/compute/algorithm/detail/scan_on_cpu.hpp
new file mode 100644
index 0000000000..6611c0ba3e
--- /dev/null
+++ b/boost/compute/algorithm/detail/scan_on_cpu.hpp
@@ -0,0 +1,103 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP
+
+#include <iterator>
+
+#include <boost/compute/device.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
+inline OutputIterator scan_on_cpu(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ bool exclusive,
+ T init,
+ BinaryOperator op,
+ command_queue &queue)
+{
+ if(first == last){
+ return result;
+ }
+
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type input_type;
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type output_type;
+
+ const context &context = queue.get_context();
+
+ // create scan kernel
+ meta_kernel k("scan_on_cpu");
+
+ // Arguments
+ size_t n_arg = k.add_arg<ulong_>("n");
+ size_t init_arg = k.add_arg<output_type>("initial_value");
+
+ if(!exclusive){
+ k <<
+ k.decl<const ulong_>("start_idx") << " = 1;\n" <<
+ k.decl<output_type>("sum") << " = " << first[0] << ";\n" <<
+ result[0] << " = sum;\n";
+ }
+ else {
+ k <<
+ k.decl<const ulong_>("start_idx") << " = 0;\n" <<
+ k.decl<output_type>("sum") << " = initial_value;\n";
+ }
+
+ k <<
+ "for(ulong i = start_idx; i < n; i++){\n" <<
+ k.decl<const input_type>("x") << " = "
+ << first[k.var<ulong_>("i")] << ";\n";
+
+ if(exclusive){
+ k << result[k.var<ulong_>("i")] << " = sum;\n";
+ }
+
+ k << " sum = "
+ << op(k.var<output_type>("sum"), k.var<output_type>("x"))
+ << ";\n";
+
+ if(!exclusive){
+ k << result[k.var<ulong_>("i")] << " = sum;\n";
+ }
+
+ k << "}\n";
+
+ // compile scan kernel
+ kernel scan_kernel = k.compile(context);
+
+ // setup kernel arguments
+ size_t n = detail::iterator_range_size(first, last);
+ scan_kernel.set_arg<ulong_>(n_arg, n);
+ scan_kernel.set_arg<output_type>(init_arg, static_cast<output_type>(init));
+
+ // execute the kernel
+ queue.enqueue_1d_range_kernel(scan_kernel, 0, 1, 1);
+
+ // return iterator pointing to the end of the result range
+ return result + n;
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP
diff --git a/boost/compute/algorithm/detail/scan_on_gpu.hpp b/boost/compute/algorithm/detail/scan_on_gpu.hpp
new file mode 100644
index 0000000000..07c6d6d3c0
--- /dev/null
+++ b/boost/compute/algorithm/detail/scan_on_gpu.hpp
@@ -0,0 +1,331 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP
+
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/scan_on_cpu.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/memory/local_buffer.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class BinaryOperator>
+class local_scan_kernel : public meta_kernel
+{
+public:
+ local_scan_kernel(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ bool exclusive,
+ BinaryOperator op)
+ : meta_kernel("local_scan")
+ {
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+ (void) last;
+
+ bool checked = true;
+
+ m_block_sums_arg = add_arg<T *>(memory_object::global_memory, "block_sums");
+ m_scratch_arg = add_arg<T *>(memory_object::local_memory, "scratch");
+ m_block_size_arg = add_arg<const cl_uint>("block_size");
+ m_count_arg = add_arg<const cl_uint>("count");
+ m_init_value_arg = add_arg<const T>("init");
+
+ // work-item parameters
+ *this <<
+ "const uint gid = get_global_id(0);\n" <<
+ "const uint lid = get_local_id(0);\n";
+
+ // check against data size
+ if(checked){
+ *this <<
+ "if(gid < count){\n";
+ }
+
+ // copy values from input to local memory
+ if(exclusive){
+ *this <<
+ decl<const T>("local_init") << "= (gid == 0) ? init : 0;\n" <<
+ "if(lid == 0){ scratch[lid] = local_init; }\n" <<
+ "else { scratch[lid] = " << first[expr<cl_uint>("gid-1")] << "; }\n";
+ }
+ else{
+ *this <<
+ "scratch[lid] = " << first[expr<cl_uint>("gid")] << ";\n";
+ }
+
+ if(checked){
+ *this <<
+ "}\n"
+ "else {\n" <<
+ " scratch[lid] = 0;\n" <<
+ "}\n";
+ }
+
+ // wait for all threads to read from input
+ *this <<
+ "barrier(CLK_LOCAL_MEM_FENCE);\n";
+
+ // perform scan
+ *this <<
+ "for(uint i = 1; i < block_size; i <<= 1){\n" <<
+ " " << decl<const T>("x") << " = lid >= i ? scratch[lid-i] : 0;\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " if(lid >= i){\n" <<
+ " scratch[lid] = " << op(var<T>("scratch[lid]"), var<T>("x")) << ";\n" <<
+ " }\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ "}\n";
+
+ // copy results to output
+ if(checked){
+ *this <<
+ "if(gid < count){\n";
+ }
+
+ *this <<
+ result[expr<cl_uint>("gid")] << " = scratch[lid];\n";
+
+ if(checked){
+ *this << "}\n";
+ }
+
+ // store sum for the block
+ if(exclusive){
+ *this <<
+ "if(lid == block_size - 1){\n" <<
+ " block_sums[get_group_id(0)] = " <<
+ op(first[expr<cl_uint>("gid")], var<T>("scratch[lid]")) <<
+ ";\n" <<
+ "}\n";
+ }
+ else {
+ *this <<
+ "if(lid == block_size - 1){\n" <<
+ " block_sums[get_group_id(0)] = scratch[lid];\n" <<
+ "}\n";
+ }
+ }
+
+ size_t m_block_sums_arg;
+ size_t m_scratch_arg;
+ size_t m_block_size_arg;
+ size_t m_count_arg;
+ size_t m_init_value_arg;
+};
+
+template<class T, class BinaryOperator>
+class write_scanned_output_kernel : public meta_kernel
+{
+public:
+ write_scanned_output_kernel(BinaryOperator op)
+ : meta_kernel("write_scanned_output")
+ {
+ bool checked = true;
+
+ m_output_arg = add_arg<T *>(memory_object::global_memory, "output");
+ m_block_sums_arg = add_arg<const T *>(memory_object::global_memory, "block_sums");
+ m_count_arg = add_arg<const cl_uint>("count");
+
+ // work-item parameters
+ *this <<
+ "const uint gid = get_global_id(0);\n" <<
+ "const uint block_id = get_group_id(0);\n";
+
+ // check against data size
+ if(checked){
+ *this << "if(gid < count){\n";
+ }
+
+ // write output
+ *this <<
+ "output[gid] = " <<
+ op(var<T>("block_sums[block_id]"), var<T>("output[gid] ")) << ";\n";
+
+ if(checked){
+ *this << "}\n";
+ }
+ }
+
+ size_t m_output_arg;
+ size_t m_block_sums_arg;
+ size_t m_count_arg;
+};
+
+template<class InputIterator>
+inline size_t pick_scan_block_size(InputIterator first, InputIterator last)
+{
+ size_t count = iterator_range_size(first, last);
+
+ if(count == 0) { return 0; }
+ else if(count <= 1) { return 1; }
+ else if(count <= 2) { return 2; }
+ else if(count <= 4) { return 4; }
+ else if(count <= 8) { return 8; }
+ else if(count <= 16) { return 16; }
+ else if(count <= 32) { return 32; }
+ else if(count <= 64) { return 64; }
+ else if(count <= 128) { return 128; }
+ else { return 256; }
+}
+
+template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
+inline OutputIterator scan_impl(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ bool exclusive,
+ T init,
+ BinaryOperator op,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ input_type;
+ typedef typename
+ std::iterator_traits<InputIterator>::difference_type
+ difference_type;
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type
+ output_type;
+
+ const context &context = queue.get_context();
+ const size_t count = detail::iterator_range_size(first, last);
+
+ size_t block_size = pick_scan_block_size(first, last);
+ size_t block_count = count / block_size;
+
+ if(block_count * block_size < count){
+ block_count++;
+ }
+
+ ::boost::compute::vector<input_type> block_sums(block_count, context);
+
+ // zero block sums
+ input_type zero;
+ std::memset(&zero, 0, sizeof(input_type));
+ ::boost::compute::fill(block_sums.begin(), block_sums.end(), zero, queue);
+
+ // local scan
+ local_scan_kernel<InputIterator, OutputIterator, BinaryOperator>
+ local_scan_kernel(first, last, result, exclusive, op);
+
+ ::boost::compute::kernel kernel = local_scan_kernel.compile(context);
+ kernel.set_arg(local_scan_kernel.m_scratch_arg, local_buffer<input_type>(block_size));
+ kernel.set_arg(local_scan_kernel.m_block_sums_arg, block_sums);
+ kernel.set_arg(local_scan_kernel.m_block_size_arg, static_cast<cl_uint>(block_size));
+ kernel.set_arg(local_scan_kernel.m_count_arg, static_cast<cl_uint>(count));
+ kernel.set_arg(local_scan_kernel.m_init_value_arg, static_cast<output_type>(init));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ block_count * block_size,
+ block_size);
+
+ // inclusive scan block sums
+ if(block_count > 1){
+ scan_impl(block_sums.begin(),
+ block_sums.end(),
+ block_sums.begin(),
+ false,
+ init,
+ op,
+ queue
+ );
+ }
+
+ // add block sums to each block
+ if(block_count > 1){
+ write_scanned_output_kernel<input_type, BinaryOperator>
+ write_output_kernel(op);
+ kernel = write_output_kernel.compile(context);
+ kernel.set_arg(write_output_kernel.m_output_arg, result.get_buffer());
+ kernel.set_arg(write_output_kernel.m_block_sums_arg, block_sums);
+ kernel.set_arg(write_output_kernel.m_count_arg, static_cast<cl_uint>(count));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ block_size,
+ block_count * block_size,
+ block_size);
+ }
+
+ return result + static_cast<difference_type>(count);
+}
+
+template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
+inline OutputIterator dispatch_scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ bool exclusive,
+ T init,
+ BinaryOperator op,
+ command_queue &queue)
+{
+ return scan_impl(first, last, result, exclusive, init, op, queue);
+}
+
+template<class InputIterator, class T, class BinaryOperator>
+inline InputIterator dispatch_scan(InputIterator first,
+ InputIterator last,
+ InputIterator result,
+ bool exclusive,
+ T init,
+ BinaryOperator op,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ if(first == result){
+ // scan input in-place
+ const context &context = queue.get_context();
+
+ // make a temporary copy the input
+ size_t count = iterator_range_size(first, last);
+ vector<value_type> tmp(count, context);
+ copy(first, last, tmp.begin(), queue);
+
+ // scan from temporary values
+ return scan_impl(tmp.begin(), tmp.end(), first, exclusive, init, op, queue);
+ }
+ else {
+ // scan input to output
+ return scan_impl(first, last, result, exclusive, init, op, queue);
+ }
+}
+
+template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
+inline OutputIterator scan_on_gpu(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ bool exclusive,
+ T init,
+ BinaryOperator op,
+ command_queue &queue)
+{
+ if(first == last){
+ return result;
+ }
+
+ return dispatch_scan(first, last, result, exclusive, init, op, queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP
diff --git a/boost/compute/algorithm/detail/search_all.hpp b/boost/compute/algorithm/detail/search_all.hpp
new file mode 100644
index 0000000000..a874bcdebe
--- /dev/null
+++ b/boost/compute/algorithm/detail/search_all.hpp
@@ -0,0 +1,86 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP
+
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Search kernel class
+///
+/// Subclass of meta_kernel which is capable of performing pattern matching
+///
+template<class PatternIterator, class TextIterator, class OutputIterator>
+class search_kernel : public meta_kernel
+{
+public:
+ search_kernel() : meta_kernel("search")
+ {}
+
+ void set_range(PatternIterator p_first,
+ PatternIterator p_last,
+ TextIterator t_first,
+ TextIterator t_last,
+ OutputIterator result)
+ {
+ m_p_count = iterator_range_size(p_first, p_last);
+ m_p_count_arg = add_arg<uint_>("p_count");
+
+ m_count = iterator_range_size(t_first, t_last);
+ m_count = m_count + 1 - m_p_count;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint i1 = i;\n" <<
+ "uint j;\n" <<
+ "for(j = 0; j<p_count; j++,i++)\n" <<
+ "{\n" <<
+ " if(" << p_first[expr<uint_>("j")] << " != " <<
+ t_first[expr<uint_>("i")] << ")\n" <<
+ " j = p_count + 1;\n" <<
+ "}\n" <<
+ "if(j == p_count)\n" <<
+ result[expr<uint_>("i1")] << " = 1;\n" <<
+ "else\n" <<
+ result[expr<uint_>("i1")] << " = 0;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ set_arg(m_p_count_arg, uint_(m_p_count));
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_p_count;
+ size_t m_p_count_arg;
+ size_t m_count;
+};
+
+} //end detail namespace
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP
diff --git a/boost/compute/algorithm/detail/serial_accumulate.hpp b/boost/compute/algorithm/detail/serial_accumulate.hpp
new file mode 100644
index 0000000000..84f9910122
--- /dev/null
+++ b/boost/compute/algorithm/detail/serial_accumulate.hpp
@@ -0,0 +1,56 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class T, class BinaryFunction>
+inline void serial_accumulate(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ T init,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ const context &context = queue.get_context();
+ size_t count = detail::iterator_range_size(first, last);
+
+ meta_kernel k("serial_accumulate");
+ size_t init_arg = k.add_arg<T>("init");
+ size_t count_arg = k.add_arg<cl_uint>("count");
+
+ k <<
+ k.decl<T>("result") << " = init;\n" <<
+ "for(uint i = 0; i < count; i++)\n" <<
+ " result = " << function(k.var<T>("result"),
+ first[k.var<cl_uint>("i")]) << ";\n" <<
+ result[0] << " = result;\n";
+
+ kernel kernel = k.compile(context);
+
+ kernel.set_arg(init_arg, init);
+ kernel.set_arg(count_arg, static_cast<cl_uint>(count));
+
+ queue.enqueue_task(kernel);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP
diff --git a/boost/compute/algorithm/detail/serial_count_if.hpp b/boost/compute/algorithm/detail/serial_count_if.hpp
new file mode 100644
index 0000000000..be6794c426
--- /dev/null
+++ b/boost/compute/algorithm/detail/serial_count_if.hpp
@@ -0,0 +1,68 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP
+
+#include <iterator>
+
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// counts values that match the predicate using a single thread
+template<class InputIterator, class Predicate>
+inline size_t serial_count_if(InputIterator first,
+ InputIterator last,
+ Predicate predicate,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ const context &context = queue.get_context();
+ size_t size = iterator_range_size(first, last);
+
+ meta_kernel k("serial_count_if");
+ k.add_set_arg("size", static_cast<uint_>(size));
+ size_t result_arg = k.add_arg<uint_ *>(memory_object::global_memory, "result");
+
+ k <<
+ "uint count = 0;\n" <<
+ "for(uint i = 0; i < size; i++){\n" <<
+ k.decl<const value_type>("value") << "="
+ << first[k.var<uint_>("i")] << ";\n" <<
+ "if(" << predicate(k.var<const value_type>("value")) << "){\n" <<
+ "count++;\n" <<
+ "}\n"
+ "}\n"
+ "*result = count;\n";
+
+ kernel kernel = k.compile(context);
+
+ // setup result buffer
+ scalar<uint_> result(context);
+ kernel.set_arg(result_arg, result.get_buffer());
+
+ // run kernel
+ queue.enqueue_task(kernel);
+
+ // read index
+ return result.read(queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP
diff --git a/boost/compute/algorithm/detail/serial_find_extrema.hpp b/boost/compute/algorithm/detail/serial_find_extrema.hpp
new file mode 100644
index 0000000000..8407c88129
--- /dev/null
+++ b/boost/compute/algorithm/detail/serial_find_extrema.hpp
@@ -0,0 +1,87 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Compare>
+inline InputIterator serial_find_extrema(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ const bool find_minimum,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+ const context &context = queue.get_context();
+
+ meta_kernel k("serial_find_extrema");
+
+ k <<
+ k.decl<value_type>("value") << " = " << first[k.expr<uint_>("0")] << ";\n" <<
+ k.decl<uint_>("value_index") << " = 0;\n" <<
+ "for(uint i = 1; i < size; i++){\n" <<
+ " " << k.decl<value_type>("candidate") << "="
+ << first[k.expr<uint_>("i")] << ";\n" <<
+
+ "#ifndef BOOST_COMPUTE_FIND_MAXIMUM\n" <<
+ " if(" << compare(k.var<value_type>("candidate"),
+ k.var<value_type>("value")) << "){\n" <<
+ "#else\n" <<
+ " if(" << compare(k.var<value_type>("value"),
+ k.var<value_type>("candidate")) << "){\n" <<
+ "#endif\n" <<
+
+ " value = candidate;\n" <<
+ " value_index = i;\n" <<
+ " }\n" <<
+ "}\n" <<
+ "*index = value_index;\n";
+
+ size_t index_arg_index = k.add_arg<uint_ *>(memory_object::global_memory, "index");
+ size_t size_arg_index = k.add_arg<uint_>("size");
+
+ std::string options;
+ if(!find_minimum){
+ options = "-DBOOST_COMPUTE_FIND_MAXIMUM";
+ }
+ kernel kernel = k.compile(context, options);
+
+ // setup index buffer
+ scalar<uint_> index(context);
+ kernel.set_arg(index_arg_index, index.get_buffer());
+
+ // setup count
+ size_t count = iterator_range_size(first, last);
+ kernel.set_arg(size_arg_index, static_cast<uint_>(count));
+
+ // run kernel
+ queue.enqueue_task(kernel);
+
+ // read index and return iterator
+ return first + static_cast<difference_type>(index.read(queue));
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP
diff --git a/boost/compute/algorithm/detail/serial_merge.hpp b/boost/compute/algorithm/detail/serial_merge.hpp
new file mode 100644
index 0000000000..85e38f704c
--- /dev/null
+++ b/boost/compute/algorithm/detail/serial_merge.hpp
@@ -0,0 +1,97 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP
+#define BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP
+
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator1,
+ class InputIterator2,
+ class OutputIterator,
+ class Compare>
+inline OutputIterator serial_merge(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ Compare comp,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputIterator1>::value_type
+ input_type1;
+ typedef typename
+ std::iterator_traits<InputIterator2>::value_type
+ input_type2;
+ typedef typename
+ std::iterator_traits<OutputIterator>::difference_type
+ result_difference_type;
+
+ std::ptrdiff_t size1 = std::distance(first1, last1);
+ std::ptrdiff_t size2 = std::distance(first2, last2);
+
+ meta_kernel k("serial_merge");
+ k.add_set_arg<uint_>("size1", static_cast<uint_>(size1));
+ k.add_set_arg<uint_>("size2", static_cast<uint_>(size2));
+
+ k <<
+ "uint i = 0;\n" << // index in result range
+ "uint j = 0;\n" << // index in first input range
+ "uint k = 0;\n" << // index in second input range
+
+ // fetch initial values from each range
+ k.decl<input_type1>("j_value") << " = " << first1[0] << ";\n" <<
+ k.decl<input_type2>("k_value") << " = " << first2[0] << ";\n" <<
+
+ // merge values from both input ranges to the result range
+ "while(j < size1 && k < size2){\n" <<
+ " if(" << comp(k.var<input_type1>("j_value"),
+ k.var<input_type2>("k_value")) << "){\n" <<
+ " " << result[k.var<uint_>("i++")] << " = j_value;\n" <<
+ " j_value = " << first1[k.var<uint_>("++j")] << ";\n" <<
+ " }\n" <<
+ " else{\n"
+ " " << result[k.var<uint_>("i++")] << " = k_value;\n"
+ " k_value = " << first2[k.var<uint_>("++k")] << ";\n" <<
+ " }\n"
+ "}\n"
+
+ // copy any remaining values from first range
+ "while(j < size1){\n" <<
+ result[k.var<uint_>("i++")] << " = " <<
+ first1[k.var<uint_>("j++")] << ";\n" <<
+ "}\n"
+
+ // copy any remaining values from second range
+ "while(k < size2){\n" <<
+ result[k.var<uint_>("i++")] << " = " <<
+ first2[k.var<uint_>("k++")] << ";\n" <<
+ "}\n";
+
+ // run kernel
+ k.exec(queue);
+
+ return result + static_cast<result_difference_type>(size1 + size2);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP
diff --git a/boost/compute/algorithm/detail/serial_reduce.hpp b/boost/compute/algorithm/detail/serial_reduce.hpp
new file mode 100644
index 0000000000..53aaf140fe
--- /dev/null
+++ b/boost/compute/algorithm/detail/serial_reduce.hpp
@@ -0,0 +1,62 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class BinaryFunction>
+inline void serial_reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type T;
+ typedef typename
+ ::boost::compute::result_of<BinaryFunction(T, T)>::type result_type;
+
+ const context &context = queue.get_context();
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return;
+ }
+
+ meta_kernel k("serial_reduce");
+ size_t count_arg = k.add_arg<cl_uint>("count");
+
+ k <<
+ k.decl<result_type>("result") << " = " << first[0] << ";\n" <<
+ "for(uint i = 1; i < count; i++)\n" <<
+ " result = " << function(k.var<T>("result"),
+ first[k.var<uint_>("i")]) << ";\n" <<
+ result[0] << " = result;\n";
+
+ kernel kernel = k.compile(context);
+
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+
+ queue.enqueue_task(kernel);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP
diff --git a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
new file mode 100644
index 0000000000..f9bda8e476
--- /dev/null
+++ b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
@@ -0,0 +1,108 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP
+
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction, class BinaryPredicate>
+inline size_t serial_reduce_by_key(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ BinaryFunction function,
+ BinaryPredicate predicate,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputValueIterator>::value_type value_type;
+ typedef typename
+ std::iterator_traits<InputKeyIterator>::value_type key_type;
+ typedef typename
+ ::boost::compute::result_of<BinaryFunction(value_type, value_type)>::type result_type;
+
+ const context &context = queue.get_context();
+ size_t count = detail::iterator_range_size(keys_first, keys_last);
+ if(count < 1){
+ return count;
+ }
+
+ meta_kernel k("serial_reduce_by_key");
+ size_t count_arg = k.add_arg<uint_>("count");
+ size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory,
+ "result_size");
+
+ convert<result_type> to_result_type;
+
+ k <<
+ k.decl<result_type>("result") <<
+ " = " << to_result_type(values_first[0]) << ";\n" <<
+ k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" <<
+ k.decl<result_type>("value") << ";\n" <<
+ k.decl<key_type>("key") << ";\n" <<
+
+ k.decl<uint_>("size") << " = 1;\n" <<
+
+ keys_result[0] << " = previous_key;\n" <<
+ values_result[0] << " = result;\n" <<
+
+ "for(ulong i = 1; i < count; i++) {\n" <<
+ " value = " << to_result_type(values_first[k.var<uint_>("i")]) << ";\n" <<
+ " key = " << keys_first[k.var<uint_>("i")] << ";\n" <<
+ " if (" << predicate(k.var<key_type>("previous_key"),
+ k.var<key_type>("key")) << ") {\n" <<
+
+ " result = " << function(k.var<result_type>("result"),
+ k.var<result_type>("value")) << ";\n" <<
+ " }\n " <<
+ " else { \n" <<
+ keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" <<
+ values_result[k.var<uint_>("size - 1")] << " = result;\n" <<
+ " result = value;\n" <<
+ " size++;\n" <<
+ " } \n" <<
+ " previous_key = key;\n" <<
+ "}\n" <<
+ keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" <<
+ values_result[k.var<uint_>("size - 1")] << " = result;\n" <<
+ "*result_size = size;";
+
+ kernel kernel = k.compile(context);
+
+ scalar<uint_> result_size(context);
+ kernel.set_arg(result_size_arg, result_size.get_buffer());
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+
+ queue.enqueue_task(kernel);
+
+ return static_cast<size_t>(result_size.read(queue));
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP
diff --git a/boost/compute/algorithm/equal.hpp b/boost/compute/algorithm/equal.hpp
new file mode 100644
index 0000000000..35d0c5f0ea
--- /dev/null
+++ b/boost/compute/algorithm/equal.hpp
@@ -0,0 +1,53 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_HPP
+#define BOOST_COMPUTE_ALGORITHM_EQUAL_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/mismatch.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if the range [\p first1, \p last1) and the range
+/// beginning at \p first2 are equal.
+template<class InputIterator1, class InputIterator2>
+inline bool equal(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::mismatch(first1,
+ last1,
+ first2,
+ queue).first == last1;
+}
+
+/// \overload
+template<class InputIterator1, class InputIterator2>
+inline bool equal(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue = system::default_queue())
+{
+ if(std::distance(first1, last1) != std::distance(first2, last2)){
+ return false;
+ }
+
+ return ::boost::compute::equal(first1, last1, first2, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_EQUAL_HPP
diff --git a/boost/compute/algorithm/equal_range.hpp b/boost/compute/algorithm/equal_range.hpp
new file mode 100644
index 0000000000..fd82177324
--- /dev/null
+++ b/boost/compute/algorithm/equal_range.hpp
@@ -0,0 +1,42 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP
+#define BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP
+
+#include <utility>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/lower_bound.hpp>
+#include <boost/compute/algorithm/upper_bound.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns a pair of iterators containing the range of values equal
+/// to \p value in the sorted range [\p first, \p last).
+template<class InputIterator, class T>
+inline std::pair<InputIterator, InputIterator>
+equal_range(InputIterator first,
+ InputIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ return std::make_pair(
+ ::boost::compute::lower_bound(first, last, value, queue),
+ ::boost::compute::upper_bound(first, last, value, queue)
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP
diff --git a/boost/compute/algorithm/exclusive_scan.hpp b/boost/compute/algorithm/exclusive_scan.hpp
new file mode 100644
index 0000000000..205d3de658
--- /dev/null
+++ b/boost/compute/algorithm/exclusive_scan.hpp
@@ -0,0 +1,96 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP
+#define BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/scan.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Performs an exclusive scan of the elements in the range [\p first, \p last)
+/// and stores the results in the range beginning at \p result.
+///
+/// Each element in the output is assigned to the sum of all the previous
+/// values in the input.
+///
+/// \param first first element in the range to scan
+/// \param last last element in the range to scan
+/// \param result first element in the result range
+/// \param init value used to initialize the scan sequence
+/// \param binary_op associative binary operator
+/// \param queue command queue to perform the operation
+///
+/// \return \c OutputIterator to the end of the result range
+///
+/// The default operation is to add the elements up.
+///
+/// \snippet test/test_scan.cpp exclusive_scan_int
+///
+/// But different associative operation can be specified as \p binary_op
+/// instead (e.g., multiplication, maximum, minimum). Also value used to
+/// initialized the scan sequence can be specified.
+///
+/// \snippet test/test_scan.cpp exclusive_scan_int_multiplies
+///
+/// \see inclusive_scan()
+template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
+inline OutputIterator
+exclusive_scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ T init,
+ BinaryOperator binary_op,
+ command_queue &queue = system::default_queue())
+{
+ return detail::scan(first, last, result, true, init, binary_op, queue);
+}
+
+/// \overload
+template<class InputIterator, class OutputIterator, class T>
+inline OutputIterator
+exclusive_scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ T init,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type output_type;
+
+ return detail::scan(first, last, result, true,
+ init, boost::compute::plus<output_type>(),
+ queue);
+}
+
+/// \overload
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+exclusive_scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type output_type;
+
+ return detail::scan(first, last, result, true,
+ output_type(0), boost::compute::plus<output_type>(),
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP
diff --git a/boost/compute/algorithm/fill.hpp b/boost/compute/algorithm/fill.hpp
new file mode 100644
index 0000000000..c711f46b94
--- /dev/null
+++ b/boost/compute/algorithm/fill.hpp
@@ -0,0 +1,306 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
+#define BOOST_COMPUTE_ALGORITHM_FILL_HPP
+
+#include <iterator>
+
+#include <boost/mpl/int.hpp>
+#include <boost/mpl/vector.hpp>
+#include <boost/mpl/contains.hpp>
+#include <boost/utility/enable_if.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/iterator/constant_iterator.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+namespace mpl = boost::mpl;
+
+// fills the range [first, first + count) with value using copy()
+template<class BufferIterator, class T>
+inline void fill_with_copy(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ ::boost::compute::copy(
+ ::boost::compute::make_constant_iterator(value, 0),
+ ::boost::compute::make_constant_iterator(value, count),
+ first,
+ queue
+ );
+}
+
+// fills the range [first, first + count) with value using copy_async()
+template<class BufferIterator, class T>
+inline future<void> fill_async_with_copy(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ return ::boost::compute::copy_async(
+ ::boost::compute::make_constant_iterator(value, 0),
+ ::boost::compute::make_constant_iterator(value, count),
+ first,
+ queue
+ );
+}
+
+#if defined(CL_VERSION_1_2)
+
+// meta-function returing true if Iterator points to a range of values
+// that can be filled using clEnqueueFillBuffer(). to meet this criteria
+// it must have a buffer accessible through iter.get_buffer() and the
+// size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
+template<class Iterator>
+struct is_valid_fill_buffer_iterator :
+ public mpl::and_<
+ is_buffer_iterator<Iterator>,
+ mpl::contains<
+ mpl::vector<
+ mpl::int_<1>,
+ mpl::int_<2>,
+ mpl::int_<4>,
+ mpl::int_<8>,
+ mpl::int_<16>,
+ mpl::int_<32>,
+ mpl::int_<64>,
+ mpl::int_<128>
+ >,
+ mpl::int_<
+ sizeof(typename std::iterator_traits<Iterator>::value_type)
+ >
+ >
+ >::type { };
+
+template<>
+struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
+
+// specialization which uses clEnqueueFillBuffer for buffer iterators
+template<class BufferIterator, class T>
+inline void
+dispatch_fill(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::enable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
+
+ if(count == 0){
+ // nothing to do
+ return;
+ }
+
+ // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
+ if(!queue.check_device_version(1, 2)){
+ return fill_with_copy(first, count, value, queue);
+ }
+
+ value_type pattern = static_cast<value_type>(value);
+ size_t offset = static_cast<size_t>(first.get_index());
+
+ if(count == 1){
+ // use clEnqueueWriteBuffer() directly when writing a single value
+ // to the device buffer. this is potentially more efficient and also
+ // works around a bug in the intel opencl driver.
+ queue.enqueue_write_buffer(
+ first.get_buffer(),
+ offset * sizeof(value_type),
+ sizeof(value_type),
+ &pattern
+ );
+ }
+ else {
+ queue.enqueue_fill_buffer(
+ first.get_buffer(),
+ &pattern,
+ sizeof(value_type),
+ offset * sizeof(value_type),
+ count * sizeof(value_type)
+ );
+ }
+}
+
+template<class BufferIterator, class T>
+inline future<void>
+dispatch_fill_async(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::enable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
+
+ // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
+ if(!queue.check_device_version(1, 2)){
+ return fill_async_with_copy(first, count, value, queue);
+ }
+
+ value_type pattern = static_cast<value_type>(value);
+ size_t offset = static_cast<size_t>(first.get_index());
+
+ event event_ =
+ queue.enqueue_fill_buffer(first.get_buffer(),
+ &pattern,
+ sizeof(value_type),
+ offset * sizeof(value_type),
+ count * sizeof(value_type));
+
+ return future<void>(event_);
+}
+
+#ifdef CL_VERSION_2_0
+// specializations for svm_ptr<T>
+template<class T>
+inline void dispatch_fill(svm_ptr<T> first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ if(count == 0){
+ return;
+ }
+
+ queue.enqueue_svm_fill(
+ first.get(), &value, sizeof(T), count * sizeof(T)
+ );
+}
+
+template<class T>
+inline future<void> dispatch_fill_async(svm_ptr<T> first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ if(count == 0){
+ return future<void>();
+ }
+
+ event event_ = queue.enqueue_svm_fill(
+ first.get(), &value, sizeof(T), count * sizeof(T)
+ );
+
+ return future<void>(event_);
+}
+#endif // CL_VERSION_2_0
+
+// default implementations
+template<class BufferIterator, class T>
+inline void
+dispatch_fill(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::disable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ fill_with_copy(first, count, value, queue);
+}
+
+template<class BufferIterator, class T>
+inline future<void>
+dispatch_fill_async(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::disable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ return fill_async_with_copy(first, count, value, queue);
+}
+#else
+template<class BufferIterator, class T>
+inline void dispatch_fill(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ fill_with_copy(first, count, value, queue);
+}
+
+template<class BufferIterator, class T>
+inline future<void> dispatch_fill_async(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ return fill_async_with_copy(first, count, value, queue);
+}
+#endif // !defined(CL_VERSION_1_2)
+
+} // end detail namespace
+
+/// Fills the range [\p first, \p last) with \p value.
+///
+/// \param first first element in the range to fill
+/// \param last last element in the range to fill
+/// \param value value to copy to each element
+/// \param queue command queue to perform the operation
+///
+/// For example, to fill a vector on the device with sevens:
+/// \code
+/// // vector on the device
+/// boost::compute::vector<int> vec(10, context);
+///
+/// // fill vector with sevens
+/// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
+/// \endcode
+///
+/// \see boost::compute::fill_n()
+template<class BufferIterator, class T>
+inline void fill(BufferIterator first,
+ BufferIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return;
+ }
+
+ detail::dispatch_fill(first, count, value, queue);
+}
+
+template<class BufferIterator, class T>
+inline future<void> fill_async(BufferIterator first,
+ BufferIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return future<void>();
+ }
+
+ return detail::dispatch_fill_async(first, count, value, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP
diff --git a/boost/compute/algorithm/fill_n.hpp b/boost/compute/algorithm/fill_n.hpp
new file mode 100644
index 0000000000..18a8f706a5
--- /dev/null
+++ b/boost/compute/algorithm/fill_n.hpp
@@ -0,0 +1,36 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FILL_N_HPP
+#define BOOST_COMPUTE_ALGORITHM_FILL_N_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Fills the range [\p first, \p first + count) with \p value.
+///
+/// \see fill()
+template<class BufferIterator, class Size, class T>
+inline void fill_n(BufferIterator first,
+ Size count,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::fill(first, first + count, value, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FILL_N_HPP
diff --git a/boost/compute/algorithm/find.hpp b/boost/compute/algorithm/find.hpp
new file mode 100644
index 0000000000..ef3ebf0c47
--- /dev/null
+++ b/boost/compute/algorithm/find.hpp
@@ -0,0 +1,57 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FIND_HPP
+#define BOOST_COMPUTE_ALGORITHM_FIND_HPP
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the first element in the range
+/// [\p first, \p last) that equals \p value.
+template<class InputIterator, class T>
+inline InputIterator find(InputIterator first,
+ InputIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ using ::boost::compute::_1;
+ using ::boost::compute::lambda::all;
+
+ if(vector_size<value_type>::value == 1){
+ return ::boost::compute::find_if(
+ first,
+ last,
+ _1 == value,
+ queue
+ );
+ }
+ else {
+ return ::boost::compute::find_if(
+ first,
+ last,
+ all(_1 == value),
+ queue
+ );
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FIND_HPP
diff --git a/boost/compute/algorithm/find_end.hpp b/boost/compute/algorithm/find_end.hpp
new file mode 100644
index 0000000000..5c40055113
--- /dev/null
+++ b/boost/compute/algorithm/find_end.hpp
@@ -0,0 +1,119 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FIND_END_HPP
+#define BOOST_COMPUTE_ALGORITHM_FIND_END_HPP
+
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/detail/search_all.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Helper function for find_end
+///
+/// Basically a copy of find_if which returns last occurence
+/// instead of first occurence
+///
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator find_end_helper(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return last;
+ }
+
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("find_end");
+ size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index");
+ atomic_max<int_> atomic_max_int;
+
+ k << k.decl<const int_>("i") << " = get_global_id(0);\n"
+ << k.decl<const value_type>("value") << "="
+ << first[k.var<const int_>("i")] << ";\n"
+ << "if(" << predicate(k.var<const value_type>("value")) << "){\n"
+ << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n"
+ << "}\n";
+
+ kernel kernel = k.compile(context);
+
+ scalar<int_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+
+ index.write(static_cast<int_>(-1), queue);
+
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ int result = static_cast<int>(index.read(queue));
+ if(result == -1) return last;
+ else return first + result;
+}
+
+} // end detail namespace
+
+///
+/// \brief Substring matching algorithm
+///
+/// Searches for the last match of the pattern [p_first, p_last)
+/// in text [t_first, t_last).
+/// \return Iterator pointing to beginning of last occurence
+///
+/// \param t_first Iterator pointing to start of text
+/// \param t_last Iterator pointing to end of text
+/// \param p_first Iterator pointing to start of pattern
+/// \param p_last Iterator pointing to end of pattern
+/// \param queue Queue on which to execute
+///
+template<class TextIterator, class PatternIterator>
+inline TextIterator find_end(TextIterator t_first,
+ TextIterator t_last,
+ PatternIterator p_first,
+ PatternIterator p_last,
+ command_queue &queue = system::default_queue())
+{
+ const context &context = queue.get_context();
+ vector<uint_> matching_indices(detail::iterator_range_size(t_first, t_last),
+ context);
+
+ detail::search_kernel<PatternIterator,
+ TextIterator,
+ vector<uint_>::iterator> kernel;
+
+ kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin());
+ kernel.exec(queue);
+
+ using boost::compute::_1;
+
+ vector<uint_>::iterator index =
+ detail::find_end_helper(matching_indices.begin(),
+ matching_indices.end(),
+ _1 == 1,
+ queue);
+
+ return t_first + detail::iterator_range_size(matching_indices.begin(), index);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FIND_END_HPP
diff --git a/boost/compute/algorithm/find_if.hpp b/boost/compute/algorithm/find_if.hpp
new file mode 100644
index 0000000000..db99cc0396
--- /dev/null
+++ b/boost/compute/algorithm/find_if.hpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/find_if_with_atomics.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the first element in the range
+/// [\p first, \p last) for which \p predicate returns \c true.
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator find_if(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return detail::find_if_with_atomics(first, last, predicate, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP
diff --git a/boost/compute/algorithm/find_if_not.hpp b/boost/compute/algorithm/find_if_not.hpp
new file mode 100644
index 0000000000..61de050d31
--- /dev/null
+++ b/boost/compute/algorithm/find_if_not.hpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP
+#define BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the first element in the range
+/// [\p first, \p last) for which \p predicate returns \c false.
+///
+/// \see find_if()
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator find_if_not(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::find_if(
+ first,
+ last,
+ not1(predicate),
+ queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP
diff --git a/boost/compute/algorithm/for_each.hpp b/boost/compute/algorithm/for_each.hpp
new file mode 100644
index 0000000000..3ed399e6e9
--- /dev/null
+++ b/boost/compute/algorithm/for_each.hpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP
+#define BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class Function>
+struct for_each_kernel : public meta_kernel
+{
+ for_each_kernel(InputIterator first, InputIterator last, Function function)
+ : meta_kernel("for_each")
+ {
+ // store range size
+ m_count = detail::iterator_range_size(first, last);
+
+ // setup kernel source
+ *this << function(first[get_global_id(0)]) << ";\n";
+ }
+
+ void exec(command_queue &queue)
+ {
+ exec_1d(queue, 0, m_count);
+ }
+
+ size_t m_count;
+};
+
+} // end detail namespace
+
+/// Calls \p function on each element in the range [\p first, \p last).
+///
+/// \see transform()
+template<class InputIterator, class UnaryFunction>
+inline UnaryFunction for_each(InputIterator first,
+ InputIterator last,
+ UnaryFunction function,
+ command_queue &queue = system::default_queue())
+{
+ detail::for_each_kernel<InputIterator, UnaryFunction> kernel(first, last, function);
+
+ kernel.exec(queue);
+
+ return function;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP
diff --git a/boost/compute/algorithm/for_each_n.hpp b/boost/compute/algorithm/for_each_n.hpp
new file mode 100644
index 0000000000..d0be784bf7
--- /dev/null
+++ b/boost/compute/algorithm/for_each_n.hpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP
+#define BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP
+
+#include <boost/compute/algorithm/for_each.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Calls \p function on each element in the range [\p first, \p first
+/// \c + \p count).
+///
+/// \see for_each()
+template<class InputIterator, class Size, class UnaryFunction>
+inline UnaryFunction for_each_n(InputIterator first,
+ Size count,
+ UnaryFunction function,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::for_each(first, first + count, function, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP
diff --git a/boost/compute/algorithm/gather.hpp b/boost/compute/algorithm/gather.hpp
new file mode 100644
index 0000000000..b2f725d54e
--- /dev/null
+++ b/boost/compute/algorithm/gather.hpp
@@ -0,0 +1,84 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_GATHER_HPP
+#define BOOST_COMPUTE_ALGORITHM_GATHER_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class MapIterator, class OutputIterator>
+class gather_kernel : public meta_kernel
+{
+public:
+ gather_kernel() : meta_kernel("gather")
+ {}
+
+ void set_range(MapIterator first,
+ MapIterator last,
+ InputIterator input,
+ OutputIterator result)
+ {
+ m_count = iterator_range_size(first, last);
+ m_offset = first.get_index();
+
+ *this <<
+ "const uint i = get_global_id(0);\n" <<
+ result[expr<uint_>("i")] << "=" <<
+ input[first[expr<uint_>("i")]] << ";\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, m_offset, m_count);
+ }
+
+private:
+ size_t m_count;
+ size_t m_offset;
+};
+
+} // end detail namespace
+
+/// Copies the elements using the indices from the range [\p first, \p last)
+/// to the range beginning at \p result using the input values from the range
+/// beginning at \p input.
+///
+/// \see scatter()
+template<class InputIterator, class MapIterator, class OutputIterator>
+inline void gather(MapIterator first,
+ MapIterator last,
+ InputIterator input,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ detail::gather_kernel<InputIterator, MapIterator, OutputIterator> kernel;
+
+ kernel.set_range(first, last, input, result);
+ kernel.exec(queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_GATHER_HPP
diff --git a/boost/compute/algorithm/generate.hpp b/boost/compute/algorithm/generate.hpp
new file mode 100644
index 0000000000..c70a542683
--- /dev/null
+++ b/boost/compute/algorithm/generate.hpp
@@ -0,0 +1,49 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_HPP
+#define BOOST_COMPUTE_ALGORITHM_GENERATE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/iterator/function_input_iterator.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Stores the result of \p generator for each element in the range
+/// [\p first, \p last).
+template<class OutputIterator, class Generator>
+inline void generate(OutputIterator first,
+ OutputIterator last,
+ Generator generator,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return;
+ }
+
+ ::boost::compute::copy(
+ ::boost::compute::make_function_input_iterator(generator,
+ first.get_index()),
+ ::boost::compute::make_function_input_iterator(generator,
+ last.get_index()),
+ first,
+ queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_GENERATE_HPP
diff --git a/boost/compute/algorithm/generate_n.hpp b/boost/compute/algorithm/generate_n.hpp
new file mode 100644
index 0000000000..6d8e607b64
--- /dev/null
+++ b/boost/compute/algorithm/generate_n.hpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP
+#define BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/generate.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Stores the result of \p generator for each element in the range
+/// [\p first, \p first + \p count).
+template<class OutputIterator, class Size, class Generator>
+inline void generate_n(OutputIterator first,
+ Size count,
+ Generator generator,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::generate(first, first + count, generator, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP
diff --git a/boost/compute/algorithm/includes.hpp b/boost/compute/algorithm/includes.hpp
new file mode 100644
index 0000000000..c4e7c793e7
--- /dev/null
+++ b/boost/compute/algorithm/includes.hpp
@@ -0,0 +1,155 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP
+#define BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/detail/balanced_path.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Serial includes kernel class
+///
+/// Subclass of meta_kernel to perform includes operation after tiling
+///
+class serial_includes_kernel : meta_kernel
+{
+public:
+
+ serial_includes_kernel() : meta_kernel("includes")
+ {
+
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator result)
+ {
+ m_count = iterator_range_size(tile_first1, tile_last1) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
+ "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
+ "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
+ "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
+ "uint includes = 1;\n" <<
+ "while(start1<end1 && start2<end2)\n" <<
+ "{\n" <<
+ " if(" << first1[expr<uint_>("start1")] << " == " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ " start1++; start2++;\n" <<
+ " }\n" <<
+ " else if(" << first1[expr<uint_>("start1")] << " < " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " start1++;\n" <<
+ " else\n" <<
+ " {\n" <<
+ " includes = 0;\n" <<
+ " break;\n" <<
+ " }\n" <<
+ "}\n" <<
+ "if(start2<end2)\n" <<
+ " includes = 0;\n" <<
+ result[expr<uint_>("i")] << " = includes;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} //end detail namespace
+
+///
+/// \brief Includes algorithm
+///
+/// Finds if the sorted range [first1, last1) includes the sorted
+/// range [first2, last2). In other words, it checks if [first1, last1) is
+/// a superset of [first2, last2).
+///
+/// \return True, if [first1, last1) includes [first2, last2). False otherwise.
+///
+/// \param first1 Iterator pointing to start of first set
+/// \param last1 Iterator pointing to end of first set
+/// \param first2 Iterator pointing to start of second set
+/// \param last2 Iterator pointing to end of second set
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2>
+inline bool includes(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue = system::default_queue())
+{
+ size_t tile_size = 1024;
+
+ size_t count1 = detail::iterator_range_size(first1, last1);
+ size_t count2 = detail::iterator_range_size(first2, last2);
+
+ vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+ vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+
+ // Tile the sets
+ detail::balanced_path_kernel tiling_kernel;
+ tiling_kernel.tile_size = static_cast<unsigned int>(tile_size);
+ tiling_kernel.set_range(first1, last1, first2, last2,
+ tile_a.begin()+1, tile_b.begin()+1);
+ fill_n(tile_a.begin(), 1, uint_(0), queue);
+ fill_n(tile_b.begin(), 1, uint_(0), queue);
+ tiling_kernel.exec(queue);
+
+ fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue);
+ fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue);
+
+ vector<uint_> result((count1+count2+tile_size-1)/tile_size, queue.get_context());
+
+ // Find individually
+ detail::serial_includes_kernel includes_kernel;
+ includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
+ tile_b.begin(), result.begin());
+
+ includes_kernel.exec(queue);
+
+ return find(result.begin(), result.end(), 0, queue) == result.end();
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP
diff --git a/boost/compute/algorithm/inclusive_scan.hpp b/boost/compute/algorithm/inclusive_scan.hpp
new file mode 100644
index 0000000000..9f98beaf7c
--- /dev/null
+++ b/boost/compute/algorithm/inclusive_scan.hpp
@@ -0,0 +1,81 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP
+#define BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/scan.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Performs an inclusive scan of the elements in the range [\p first, \p last)
+/// and stores the results in the range beginning at \p result.
+///
+/// Each element in the output is assigned to the sum of the current value in
+/// the input with the sum of every previous value in the input.
+///
+/// \param first first element in the range to scan
+/// \param last last element in the range to scan
+/// \param result first element in the result range
+/// \param binary_op associative binary operator
+/// \param queue command queue to perform the operation
+///
+/// \return \c OutputIterator to the end of the result range
+///
+/// The default operation is to add the elements up.
+///
+/// \snippet test/test_scan.cpp inclusive_scan_int
+///
+/// But different associative operation can be specified as \p binary_op
+/// instead (e.g., multiplication, maximum, minimum).
+///
+/// \snippet test/test_scan.cpp inclusive_scan_int_multiplies
+///
+/// \see exclusive_scan()
+template<class InputIterator, class OutputIterator, class BinaryOperator>
+inline OutputIterator
+inclusive_scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryOperator binary_op,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type output_type;
+
+ return detail::scan(first, last, result, false,
+ output_type(0), binary_op,
+ queue);
+}
+
+/// \overload
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+inclusive_scan(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename
+ std::iterator_traits<OutputIterator>::value_type output_type;
+
+ return detail::scan(first, last, result, false,
+ output_type(0), boost::compute::plus<output_type>(),
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP
diff --git a/boost/compute/algorithm/inner_product.hpp b/boost/compute/algorithm/inner_product.hpp
new file mode 100644
index 0000000000..614611f91e
--- /dev/null
+++ b/boost/compute/algorithm/inner_product.hpp
@@ -0,0 +1,93 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP
+#define BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/accumulate.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/iterator/zip_iterator.hpp>
+#include <boost/compute/functional/detail/unpack.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns the inner product of the elements in the range
+/// [\p first1, \p last1) with the elements in the range beginning
+/// at \p first2.
+template<class InputIterator1, class InputIterator2, class T>
+inline T inner_product(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ T init,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type input_type;
+
+ ptrdiff_t n = std::distance(first1, last1);
+
+ return ::boost::compute::accumulate(
+ ::boost::compute::make_transform_iterator(
+ ::boost::compute::make_zip_iterator(
+ boost::make_tuple(first1, first2)
+ ),
+ detail::unpack(multiplies<input_type>())
+ ),
+ ::boost::compute::make_transform_iterator(
+ ::boost::compute::make_zip_iterator(
+ boost::make_tuple(last1, first2 + n)
+ ),
+ detail::unpack(multiplies<input_type>())
+ ),
+ init,
+ queue
+ );
+}
+
+/// \overload
+template<class InputIterator1,
+ class InputIterator2,
+ class T,
+ class BinaryAccumulateFunction,
+ class BinaryTransformFunction>
+inline T inner_product(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ T init,
+ BinaryAccumulateFunction accumulate_function,
+ BinaryTransformFunction transform_function,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first1, last1);
+ vector<value_type> result(count, queue.get_context());
+ transform(first1,
+ last1,
+ first2,
+ result.begin(),
+ transform_function,
+ queue);
+
+ return ::boost::compute::accumulate(result.begin(),
+ result.end(),
+ init,
+ accumulate_function,
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP
diff --git a/boost/compute/algorithm/inplace_merge.hpp b/boost/compute/algorithm/inplace_merge.hpp
new file mode 100644
index 0000000000..3080950df5
--- /dev/null
+++ b/boost/compute/algorithm/inplace_merge.hpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP
+#define BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/merge.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Merges the sorted values in the range [\p first, \p middle) with
+/// the sorted values in the range [\p middle, \p last) in-place.
+template<class Iterator>
+inline void inplace_merge(Iterator first,
+ Iterator middle,
+ Iterator last,
+ command_queue &queue = system::default_queue())
+{
+ BOOST_ASSERT(first < middle && middle < last);
+
+ typedef typename std::iterator_traits<Iterator>::value_type T;
+
+ const context &context = queue.get_context();
+
+ ptrdiff_t left_size = std::distance(first, middle);
+ ptrdiff_t right_size = std::distance(middle, last);
+
+ vector<T> left(left_size, context);
+ vector<T> right(right_size, context);
+
+ copy(first, middle, left.begin(), queue);
+ copy(middle, last, right.begin(), queue);
+
+ ::boost::compute::merge(
+ left.begin(),
+ left.end(),
+ right.begin(),
+ right.end(),
+ first,
+ queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP
diff --git a/boost/compute/algorithm/iota.hpp b/boost/compute/algorithm/iota.hpp
new file mode 100644
index 0000000000..084c3d8d97
--- /dev/null
+++ b/boost/compute/algorithm/iota.hpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_IOTA_HPP
+#define BOOST_COMPUTE_ALGORITHM_IOTA_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/iterator/counting_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Fills the range [\p first, \p last) with sequential values starting at
+/// \p value.
+///
+/// For example, the following code:
+/// \snippet test/test_iota.cpp iota
+///
+/// Will fill \c vec with the values (\c 0, \c 1, \c 2, \c ...).
+template<class BufferIterator, class T>
+inline void iota(BufferIterator first,
+ BufferIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ T count = static_cast<T>(detail::iterator_range_size(first, last));
+
+ copy(
+ ::boost::compute::make_counting_iterator(value),
+ ::boost::compute::make_counting_iterator(value + count),
+ first,
+ queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_IOTA_HPP
diff --git a/boost/compute/algorithm/is_partitioned.hpp b/boost/compute/algorithm/is_partitioned.hpp
new file mode 100644
index 0000000000..3916825057
--- /dev/null
+++ b/boost/compute/algorithm/is_partitioned.hpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP
+#define BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+#include <boost/compute/algorithm/find_if_not.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if the values in the range [\p first, \p last)
+/// are partitioned according to \p predicate.
+template<class InputIterator, class UnaryPredicate>
+inline bool is_partitioned(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::find_if(
+ ::boost::compute::find_if_not(first,
+ last,
+ predicate,
+ queue),
+ last,
+ predicate,
+ queue) == last;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP
diff --git a/boost/compute/algorithm/is_permutation.hpp b/boost/compute/algorithm/is_permutation.hpp
new file mode 100644
index 0000000000..1e502efb37
--- /dev/null
+++ b/boost/compute/algorithm/is_permutation.hpp
@@ -0,0 +1,67 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP
+#define BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/algorithm/equal.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// \brief Permutation checking algorithm
+///
+/// Checks if the range [first1, last1) can be permuted into the
+/// range [first2, last2)
+/// \return True, if it can be permuted. False, otherwise.
+///
+/// \param first1 Iterator pointing to start of first range
+/// \param last1 Iterator pointing to end of first range
+/// \param first2 Iterator pointing to start of second range
+/// \param last2 Iterator pointing to end of second range
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2>
+inline bool is_permutation(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type1;
+ typedef typename std::iterator_traits<InputIterator2>::value_type value_type2;
+
+ size_t count1 = detail::iterator_range_size(first1, last1);
+ size_t count2 = detail::iterator_range_size(first2, last2);
+
+ if(count1 != count2) return false;
+
+ vector<value_type1> temp1(first1, last1, queue);
+ vector<value_type2> temp2(first2, last2, queue);
+
+ sort(temp1.begin(), temp1.end(), queue);
+ sort(temp2.begin(), temp2.end(), queue);
+
+ return equal(temp1.begin(), temp1.end(),
+ temp2.begin(), queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP
diff --git a/boost/compute/algorithm/is_sorted.hpp b/boost/compute/algorithm/is_sorted.hpp
new file mode 100644
index 0000000000..a605159ac3
--- /dev/null
+++ b/boost/compute/algorithm/is_sorted.hpp
@@ -0,0 +1,64 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP
+#define BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional/bind.hpp>
+#include <boost/compute/functional/operator.hpp>
+#include <boost/compute/algorithm/adjacent_find.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if the values in the range [\p first, \p last)
+/// are in sorted order.
+///
+/// \param first first element in the range to check
+/// \param last last element in the range to check
+/// \param compare comparison function (by default \c less)
+/// \param queue command queue to perform the operation
+///
+/// \return \c true if the range [\p first, \p last) is sorted
+///
+/// \see sort()
+template<class InputIterator, class Compare>
+inline bool is_sorted(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ using ::boost::compute::placeholders::_1;
+ using ::boost::compute::placeholders::_2;
+
+ return ::boost::compute::adjacent_find(
+ first, last, ::boost::compute::bind(compare, _2, _1), queue
+ ) == last;
+}
+
+/// \overload
+template<class InputIterator>
+inline bool is_sorted(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ return ::boost::compute::is_sorted(
+ first, last, ::boost::compute::less<value_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP
diff --git a/boost/compute/algorithm/lexicographical_compare.hpp b/boost/compute/algorithm/lexicographical_compare.hpp
new file mode 100644
index 0000000000..c4f7120807
--- /dev/null
+++ b/boost/compute/algorithm/lexicographical_compare.hpp
@@ -0,0 +1,117 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Mageswaran.D <mageswaran1989@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/any_of.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+
+namespace detail {
+
+const char lexicographical_compare_source[] =
+"__kernel void lexicographical_compare(const uint size1,\n"
+" const uint size2,\n"
+" __global const T1 *range1,\n"
+" __global const T2 *range2,\n"
+" __global bool *result_buf)\n"
+"{\n"
+" const uint i = get_global_id(0);\n"
+" if((i != size1) && (i != size2)){\n"
+ //Individual elements are compared and results are stored in parallel.
+ //0 is true
+" if(range1[i] < range2[i])\n"
+" result_buf[i] = 0;\n"
+" else\n"
+" result_buf[i] = 1;\n"
+" }\n"
+" else\n"
+" result_buf[i] = !((i == size1) && (i != size2));\n"
+"}\n";
+
+template<class InputIterator1, class InputIterator2>
+inline bool dispatch_lexicographical_compare(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue)
+{
+ const boost::compute::context &context = queue.get_context();
+
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(context);
+
+ size_t iterator_size1 = iterator_range_size(first1, last1);
+ size_t iterator_size2 = iterator_range_size(first2, last2);
+ size_t max_size = (std::max)(iterator_size1, iterator_size2);
+
+ if(max_size == 0){
+ return false;
+ }
+
+ boost::compute::vector<bool> result_vector(max_size, context);
+
+
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type1;
+ typedef typename std::iterator_traits<InputIterator2>::value_type value_type2;
+
+ // load (or create) lexicographical compare program
+ std::string cache_key =
+ std::string("__boost_lexicographical_compare")
+ + type_name<value_type1>() + type_name<value_type2>();
+
+ std::stringstream options;
+ options << " -DT1=" << type_name<value_type1>();
+ options << " -DT2=" << type_name<value_type2>();
+
+ program lexicographical_compare_program = cache->get_or_build(
+ cache_key, options.str(), lexicographical_compare_source, context
+ );
+
+ kernel lexicographical_compare_kernel(lexicographical_compare_program,
+ "lexicographical_compare");
+
+ lexicographical_compare_kernel.set_arg<uint_>(0, iterator_size1);
+ lexicographical_compare_kernel.set_arg<uint_>(1, iterator_size2);
+ lexicographical_compare_kernel.set_arg(2, first1.get_buffer());
+ lexicographical_compare_kernel.set_arg(3, first2.get_buffer());
+ lexicographical_compare_kernel.set_arg(4, result_vector.get_buffer());
+
+ queue.enqueue_1d_range_kernel(lexicographical_compare_kernel,
+ 0,
+ max_size,
+ 0);
+
+ return boost::compute::any_of(result_vector.begin(),
+ result_vector.end(),
+ _1 == 0,
+ queue);
+}
+
+} // end detail namespace
+
+/// Checks if the first range [first1, last1) is lexicographically
+/// less than the second range [first2, last2).
+template<class InputIterator1, class InputIterator2>
+inline bool lexicographical_compare(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue = system::default_queue())
+{
+ return detail::dispatch_lexicographical_compare(first1, last1, first2, last2, queue);
+}
+
+} // end compute namespace
+} // end boost namespac
diff --git a/boost/compute/algorithm/lower_bound.hpp b/boost/compute/algorithm/lower_bound.hpp
new file mode 100644
index 0000000000..b2011c66ef
--- /dev/null
+++ b/boost/compute/algorithm/lower_bound.hpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP
+#define BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/binary_find.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the first element in the sorted
+/// range [\p first, \p last) that is not less than \p value.
+///
+/// \see upper_bound()
+template<class InputIterator, class T>
+inline InputIterator
+lower_bound(InputIterator first,
+ InputIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ using ::boost::compute::_1;
+
+ InputIterator position =
+ detail::binary_find(first, last, _1 >= value, queue);
+
+ return position;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP
diff --git a/boost/compute/algorithm/max_element.hpp b/boost/compute/algorithm/max_element.hpp
new file mode 100644
index 0000000000..55f2f7ffbf
--- /dev/null
+++ b/boost/compute/algorithm/max_element.hpp
@@ -0,0 +1,74 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP
+#define BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/algorithm/detail/find_extrema.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the element in the range
+/// [\p first, \p last) with the maximum value.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param compare comparison function object which returns true if the first
+/// argument is less than (i.e. is ordered before) the second.
+/// \param queue command queue to perform the operation
+///
+/// For example, to find \c int2 value with maximum first component in given vector:
+/// \code
+/// // comparison function object
+/// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b),
+/// {
+/// return a.x < b.x;
+/// });
+///
+/// // create vector
+/// boost::compute::vector<uint2_> data = ...
+///
+/// boost::compute::vector<uint2_>::iterator max =
+/// boost::compute::max_element(data.begin(), data.end(), compare_first, queue);
+/// \endcode
+///
+/// \see min_element()
+template<class InputIterator, class Compare>
+inline InputIterator
+max_element(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ return detail::find_extrema(first, last, compare, false, queue);
+}
+
+///\overload
+template<class InputIterator>
+inline InputIterator
+max_element(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ return ::boost::compute::max_element(
+ first, last, ::boost::compute::less<value_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP
diff --git a/boost/compute/algorithm/merge.hpp b/boost/compute/algorithm/merge.hpp
new file mode 100644
index 0000000000..875a283044
--- /dev/null
+++ b/boost/compute/algorithm/merge.hpp
@@ -0,0 +1,105 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_MERGE_HPP
+#define BOOST_COMPUTE_ALGORITHM_MERGE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/detail/merge_with_merge_path.hpp>
+#include <boost/compute/algorithm/detail/serial_merge.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Merges the sorted values in the range [\p first1, \p last1) with the sorted
+/// values in the range [\p first2, last2) and stores the result in the range
+/// beginning at \p result. Values are compared using the \p comp function. If
+/// no comparision function is given, \c less is used.
+///
+/// \param first1 first element in the first range to merge
+/// \param last1 last element in the first range to merge
+/// \param first2 first element in the second range to merge
+/// \param last2 last element in the second range to merge
+/// \param result first element in the result range
+/// \param comp comparison function (by default \c less)
+/// \param queue command queue to perform the operation
+///
+/// \return \c OutputIterator to the end of the result range
+///
+/// \see inplace_merge()
+template<class InputIterator1,
+ class InputIterator2,
+ class OutputIterator,
+ class Compare>
+inline OutputIterator merge(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ Compare comp,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type input1_type;
+ typedef typename std::iterator_traits<InputIterator2>::value_type input2_type;
+ typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
+
+ const device &device = queue.get_device();
+
+ std::string cache_key =
+ std::string("__boost_merge_") + type_name<input1_type>() + "_"
+ + type_name<input2_type>() + "_" + type_name<output_type>();
+ boost::shared_ptr<detail::parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ // default serial merge threshold depends on device type
+ size_t default_serial_merge_threshold = 32768;
+ if(device.type() & device::gpu) {
+ default_serial_merge_threshold = 2048;
+ }
+
+ // loading serial merge threshold parameter
+ const size_t serial_merge_threshold =
+ parameters->get(cache_key, "serial_merge_threshold",
+ static_cast<uint_>(default_serial_merge_threshold));
+
+ // choosing merge algorithm
+ const size_t total_count =
+ detail::iterator_range_size(first1, last1)
+ + detail::iterator_range_size(first2, last2);
+ // for small inputs serial merge turns out to outperform
+ // merge with merge path algorithm
+ if(total_count <= serial_merge_threshold){
+ return detail::serial_merge(first1, last1, first2, last2, result, comp, queue);
+ }
+ return detail::merge_with_merge_path(first1, last1, first2, last2, result, comp, queue);
+}
+
+/// \overload
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+inline OutputIterator merge(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+ less<value_type> less_than;
+ return merge(first1, last1, first2, last2, result, less_than, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_MERGE_HPP
diff --git a/boost/compute/algorithm/min_element.hpp b/boost/compute/algorithm/min_element.hpp
new file mode 100644
index 0000000000..62744efb98
--- /dev/null
+++ b/boost/compute/algorithm/min_element.hpp
@@ -0,0 +1,74 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP
+#define BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/algorithm/detail/find_extrema.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the element in range
+/// [\p first, \p last) with the minimum value.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param compare comparison function object which returns true if the first
+/// argument is less than (i.e. is ordered before) the second.
+/// \param queue command queue to perform the operation
+///
+/// For example, to find \c int2 value with minimum first component in given vector:
+/// \code
+/// // comparison function object
+/// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b),
+/// {
+/// return a.x < b.x;
+/// });
+///
+/// // create vector
+/// boost::compute::vector<uint2_> data = ...
+///
+/// boost::compute::vector<uint2_>::iterator min =
+/// boost::compute::min_element(data.begin(), data.end(), compare_first, queue);
+/// \endcode
+///
+/// \see max_element()
+template<class InputIterator, class Compare>
+inline InputIterator
+min_element(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ return detail::find_extrema(first, last, compare, true, queue);
+}
+
+///\overload
+template<class InputIterator>
+inline InputIterator
+min_element(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ return ::boost::compute::min_element(
+ first, last, ::boost::compute::less<value_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP
diff --git a/boost/compute/algorithm/minmax_element.hpp b/boost/compute/algorithm/minmax_element.hpp
new file mode 100644
index 0000000000..bf32c3c989
--- /dev/null
+++ b/boost/compute/algorithm/minmax_element.hpp
@@ -0,0 +1,70 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP
+#define BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP
+
+#include <utility>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/max_element.hpp>
+#include <boost/compute/algorithm/min_element.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns a pair of iterators with the first pointing to the minimum
+/// element and the second pointing to the maximum element in the range
+/// [\p first, \p last).
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param compare comparison function object which returns true if the first
+/// argument is less than (i.e. is ordered before) the second.
+/// \param queue command queue to perform the operation
+///
+/// \see max_element(), min_element()
+template<class InputIterator, class Compare>
+inline std::pair<InputIterator, InputIterator>
+minmax_element(InputIterator first,
+ InputIterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ if(first == last){
+ // empty range
+ return std::make_pair(first, first);
+ }
+
+ return std::make_pair(min_element(first, last, compare, queue),
+ max_element(first, last, compare, queue));
+}
+
+///\overload
+template<class InputIterator, class Compare>
+inline std::pair<InputIterator, InputIterator>
+minmax_element(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ if(first == last){
+ // empty range
+ return std::make_pair(first, first);
+ }
+
+ return std::make_pair(min_element(first, last, queue),
+ max_element(first, last, queue));
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP
diff --git a/boost/compute/algorithm/mismatch.hpp b/boost/compute/algorithm/mismatch.hpp
new file mode 100644
index 0000000000..e7db883004
--- /dev/null
+++ b/boost/compute/algorithm/mismatch.hpp
@@ -0,0 +1,89 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP
+#define BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP
+
+#include <iterator>
+#include <utility>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/iterator/zip_iterator.hpp>
+#include <boost/compute/functional/detail/unpack.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns a pair of iterators pointing to the first position where the
+/// range [\p first1, \p last1) and the range starting at \p first2
+/// differ.
+template<class InputIterator1, class InputIterator2>
+inline std::pair<InputIterator1, InputIterator2>
+mismatch(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ ::boost::compute::equal_to<value_type> op;
+
+ InputIterator2 last2 = first2 + std::distance(first1, last1);
+
+ InputIterator1 iter =
+ boost::get<0>(
+ ::boost::compute::find(
+ ::boost::compute::make_transform_iterator(
+ ::boost::compute::make_zip_iterator(
+ boost::make_tuple(first1, first2)
+ ),
+ detail::unpack(op)
+ ),
+ ::boost::compute::make_transform_iterator(
+ ::boost::compute::make_zip_iterator(
+ boost::make_tuple(last1, last2)
+ ),
+ detail::unpack(op)
+ ),
+ false,
+ queue
+ ).base().get_iterator_tuple()
+ );
+
+ return std::make_pair(iter, first2 + std::distance(first1, iter));
+}
+
+/// \overload
+template<class InputIterator1, class InputIterator2>
+inline std::pair<InputIterator1, InputIterator2>
+mismatch(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue = system::default_queue())
+{
+ if(std::distance(first1, last1) < std::distance(first2, last2)){
+ return ::boost::compute::mismatch(first1, last1, first2, queue);
+ }
+ else {
+ return ::boost::compute::mismatch(
+ first1, first1 + std::distance(first2, last2), first2, queue
+ );
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP
diff --git a/boost/compute/algorithm/next_permutation.hpp b/boost/compute/algorithm/next_permutation.hpp
new file mode 100644
index 0000000000..e81fbd2ee8
--- /dev/null
+++ b/boost/compute/algorithm/next_permutation.hpp
@@ -0,0 +1,170 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP
+#define BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Helper function for next_permutation
+///
+/// To find rightmost element which is smaller
+/// than its next element
+///
+template<class InputIterator>
+inline InputIterator next_permutation_helper(InputIterator first,
+ InputIterator last,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0 || count == 1){
+ return last;
+ }
+ count = count - 1;
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("next_permutation");
+ size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index");
+ atomic_max<int_> atomic_max_int;
+
+ k << k.decl<const int_>("i") << " = get_global_id(0);\n"
+ << k.decl<const value_type>("cur_value") << "="
+ << first[k.var<const int_>("i")] << ";\n"
+ << k.decl<const value_type>("next_value") << "="
+ << first[k.expr<const int_>("i+1")] << ";\n"
+ << "if(cur_value < next_value){\n"
+ << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n"
+ << "}\n";
+
+ kernel kernel = k.compile(context);
+
+ scalar<int_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+
+ index.write(static_cast<int_>(-1), queue);
+
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ int result = static_cast<int>(index.read(queue));
+ if(result == -1) return last;
+ else return first + result;
+}
+
+///
+/// \brief Helper function for next_permutation
+///
+/// To find the smallest element to the right of the element found above
+/// that is greater than it
+///
+template<class InputIterator, class ValueType>
+inline InputIterator np_ceiling(InputIterator first,
+ InputIterator last,
+ ValueType value,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return last;
+ }
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("np_ceiling");
+ size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index");
+ size_t value_arg = k.add_arg<value_type>(memory_object::private_memory, "value");
+ atomic_max<int_> atomic_max_int;
+
+ k << k.decl<const int_>("i") << " = get_global_id(0);\n"
+ << k.decl<const value_type>("cur_value") << "="
+ << first[k.var<const int_>("i")] << ";\n"
+ << "if(cur_value <= " << first[k.expr<int_>("*index")]
+ << " && cur_value > value){\n"
+ << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n"
+ << "}\n";
+
+ kernel kernel = k.compile(context);
+
+ scalar<int_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+
+ index.write(static_cast<int_>(0), queue);
+
+ kernel.set_arg(value_arg, value);
+
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ int result = static_cast<int>(index.read(queue));
+ return first + result;
+}
+
+} // end detail namespace
+
+///
+/// \brief Permutation generating algorithm
+///
+/// Transforms the range [first, last) into the next permutation from the
+/// set of all permutations arranged in lexicographic order
+/// \return Boolean value signifying if the last permutation was crossed
+/// and the range was reset
+///
+/// \param first Iterator pointing to start of range
+/// \param last Iterator pointing to end of range
+/// \param queue Queue on which to execute
+///
+template<class InputIterator>
+inline bool next_permutation(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ if(first == last) return false;
+
+ InputIterator first_element =
+ detail::next_permutation_helper(first, last, queue);
+
+ if(first_element == last)
+ {
+ reverse(first, last, queue);
+ return false;
+ }
+
+ value_type first_value = first_element.read(queue);
+
+ InputIterator ceiling_element =
+ detail::np_ceiling(first_element + 1, last, first_value, queue);
+
+ value_type ceiling_value = ceiling_element.read(queue);
+
+ first_element.write(ceiling_value, queue);
+ ceiling_element.write(first_value, queue);
+
+ reverse(first_element + 1, last, queue);
+
+ return true;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP
diff --git a/boost/compute/algorithm/none_of.hpp b/boost/compute/algorithm/none_of.hpp
new file mode 100644
index 0000000000..c25dd12a87
--- /dev/null
+++ b/boost/compute/algorithm/none_of.hpp
@@ -0,0 +1,36 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP
+#define BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/find_if.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns \c true if \p predicate returns \c true for none of the elements in
+/// the range [\p first, \p last).
+///
+/// \see all_of(), any_of()
+template<class InputIterator, class UnaryPredicate>
+inline bool none_of(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::find_if(first, last, predicate, queue) == last;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP
diff --git a/boost/compute/algorithm/nth_element.hpp b/boost/compute/algorithm/nth_element.hpp
new file mode 100644
index 0000000000..68f7a3dbc0
--- /dev/null
+++ b/boost/compute/algorithm/nth_element.hpp
@@ -0,0 +1,87 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP
+#define BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/algorithm/partition.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/functional/bind.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Rearranges the elements in the range [\p first, \p last) such that
+/// the \p nth element would be in that position in a sorted sequence.
+template<class Iterator, class Compare>
+inline void nth_element(Iterator first,
+ Iterator nth,
+ Iterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ if(nth == last) return;
+
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ while(1)
+ {
+ value_type value = nth.read(queue);
+
+ using boost::compute::placeholders::_1;
+ Iterator new_nth = partition(
+ first, last, ::boost::compute::bind(compare, _1, value), queue
+ );
+
+ Iterator old_nth = find(new_nth, last, value, queue);
+
+ value_type new_value = new_nth.read(queue);
+
+ fill_n(new_nth, 1, value, queue);
+ fill_n(old_nth, 1, new_value, queue);
+
+ new_value = nth.read(queue);
+
+ if(value == new_value) break;
+
+ if(std::distance(first, nth) < std::distance(first, new_nth))
+ {
+ last = new_nth;
+ }
+ else
+ {
+ first = new_nth;
+ }
+ }
+}
+
+/// \overload
+template<class Iterator>
+inline void nth_element(Iterator first,
+ Iterator nth,
+ Iterator last,
+ command_queue &queue = system::default_queue())
+{
+ if(nth == last) return;
+
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ less<value_type> less_than;
+
+ return nth_element(first, nth, last, less_than, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP
diff --git a/boost/compute/algorithm/partial_sum.hpp b/boost/compute/algorithm/partial_sum.hpp
new file mode 100644
index 0000000000..d440369a5a
--- /dev/null
+++ b/boost/compute/algorithm/partial_sum.hpp
@@ -0,0 +1,37 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP
+#define BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/inclusive_scan.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Calculates the cumulative sum of the elements in the range [\p first,
+/// \p last) and writes the resulting values to the range beginning at
+/// \p result.
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+partial_sum(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ return ::boost::compute::inclusive_scan(first, last, result, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP
diff --git a/boost/compute/algorithm/partition.hpp b/boost/compute/algorithm/partition.hpp
new file mode 100644
index 0000000000..7860350e0d
--- /dev/null
+++ b/boost/compute/algorithm/partition.hpp
@@ -0,0 +1,39 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_HPP
+#define BOOST_COMPUTE_ALGORITHM_PARTITION_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/stable_partition.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// Partitions the elements in the range [\p first, \p last) according to
+/// \p predicate. Order of the elements need not be preserved.
+///
+/// \see is_partitioned() and stable_partition()
+///
+template<class Iterator, class UnaryPredicate>
+inline Iterator partition(Iterator first,
+ Iterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return stable_partition(first, last, predicate, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP
diff --git a/boost/compute/algorithm/partition_copy.hpp b/boost/compute/algorithm/partition_copy.hpp
new file mode 100644
index 0000000000..80a2c6475f
--- /dev/null
+++ b/boost/compute/algorithm/partition_copy.hpp
@@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP
+#define BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Copies all of the elements in the range [\p first, \p last) for which
+/// \p predicate returns \c true to the range beginning at \p first_true
+/// and all of the elements for which \p predicate returns \c false to
+/// the range beginning at \p first_false.
+///
+/// \see partition()
+template<class InputIterator,
+ class OutputIterator1,
+ class OutputIterator2,
+ class UnaryPredicate>
+inline std::pair<OutputIterator1, OutputIterator2>
+partition_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator1 first_true,
+ OutputIterator2 first_false,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ // copy true values
+ OutputIterator1 last_true =
+ ::boost::compute::copy_if(first,
+ last,
+ first_true,
+ predicate,
+ queue);
+
+ // copy false values
+ OutputIterator2 last_false =
+ ::boost::compute::copy_if(first,
+ last,
+ first_false,
+ not1(predicate),
+ queue);
+
+ // return iterators to the end of the true and the false ranges
+ return std::make_pair(last_true, last_false);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP
diff --git a/boost/compute/algorithm/partition_point.hpp b/boost/compute/algorithm/partition_point.hpp
new file mode 100644
index 0000000000..3cc2bc0ca6
--- /dev/null
+++ b/boost/compute/algorithm/partition_point.hpp
@@ -0,0 +1,46 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP
+#define BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/binary_find.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// \brief Partition point algorithm
+///
+/// Finds the end of true values in the partitioned range [first, last)
+/// \return Iterator pointing to end of true values
+///
+/// \param first Iterator pointing to start of range
+/// \param last Iterator pointing to end of range
+/// \param predicate Unary predicate to be applied on each element
+/// \param queue Queue on which to execute
+///
+/// \see partition() and stable_partition()
+///
+template<class InputIterator, class UnaryPredicate>
+inline InputIterator partition_point(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return detail::binary_find(first, last, not1(predicate), queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP
diff --git a/boost/compute/algorithm/prev_permutation.hpp b/boost/compute/algorithm/prev_permutation.hpp
new file mode 100644
index 0000000000..03c01bf8f4
--- /dev/null
+++ b/boost/compute/algorithm/prev_permutation.hpp
@@ -0,0 +1,170 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP
+#define BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Helper function for prev_permutation
+///
+/// To find rightmost element which is greater
+/// than its next element
+///
+template<class InputIterator>
+inline InputIterator prev_permutation_helper(InputIterator first,
+ InputIterator last,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0 || count == 1){
+ return last;
+ }
+ count = count - 1;
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("prev_permutation");
+ size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index");
+ atomic_max<int_> atomic_max_int;
+
+ k << k.decl<const int_>("i") << " = get_global_id(0);\n"
+ << k.decl<const value_type>("cur_value") << "="
+ << first[k.var<const int_>("i")] << ";\n"
+ << k.decl<const value_type>("next_value") << "="
+ << first[k.expr<const int_>("i+1")] << ";\n"
+ << "if(cur_value > next_value){\n"
+ << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n"
+ << "}\n";
+
+ kernel kernel = k.compile(context);
+
+ scalar<int_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+
+ index.write(static_cast<int_>(-1), queue);
+
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ int result = static_cast<int>(index.read(queue));
+ if(result == -1) return last;
+ else return first + result;
+}
+
+///
+/// \brief Helper function for prev_permutation
+///
+/// To find the largest element to the right of the element found above
+/// that is smaller than it
+///
+template<class InputIterator, class ValueType>
+inline InputIterator pp_floor(InputIterator first,
+ InputIterator last,
+ ValueType value,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return last;
+ }
+ const context &context = queue.get_context();
+
+ detail::meta_kernel k("pp_floor");
+ size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index");
+ size_t value_arg = k.add_arg<value_type>(memory_object::private_memory, "value");
+ atomic_max<int_> atomic_max_int;
+
+ k << k.decl<const int_>("i") << " = get_global_id(0);\n"
+ << k.decl<const value_type>("cur_value") << "="
+ << first[k.var<const int_>("i")] << ";\n"
+ << "if(cur_value >= " << first[k.expr<int_>("*index")]
+ << " && cur_value < value){\n"
+ << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n"
+ << "}\n";
+
+ kernel kernel = k.compile(context);
+
+ scalar<int_> index(context);
+ kernel.set_arg(index_arg, index.get_buffer());
+
+ index.write(static_cast<int_>(0), queue);
+
+ kernel.set_arg(value_arg, value);
+
+ queue.enqueue_1d_range_kernel(kernel, 0, count, 0);
+
+ int result = static_cast<int>(index.read(queue));
+ return first + result;
+}
+
+} // end detail namespace
+
+///
+/// \brief Permutation generating algorithm
+///
+/// Transforms the range [first, last) into the previous permutation from
+/// the set of all permutations arranged in lexicographic order
+/// \return Boolean value signifying if the first permutation was crossed
+/// and the range was reset
+///
+/// \param first Iterator pointing to start of range
+/// \param last Iterator pointing to end of range
+/// \param queue Queue on which to execute
+///
+template<class InputIterator>
+inline bool prev_permutation(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ if(first == last) return false;
+
+ InputIterator first_element =
+ detail::prev_permutation_helper(first, last, queue);
+
+ if(first_element == last)
+ {
+ reverse(first, last, queue);
+ return false;
+ }
+
+ value_type first_value = first_element.read(queue);
+
+ InputIterator ceiling_element =
+ detail::pp_floor(first_element + 1, last, first_value, queue);
+
+ value_type ceiling_value = ceiling_element.read(queue);
+
+ first_element.write(ceiling_value, queue);
+ ceiling_element.write(first_value, queue);
+
+ reverse(first_element + 1, last, queue);
+
+ return true;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP
diff --git a/boost/compute/algorithm/random_shuffle.hpp b/boost/compute/algorithm/random_shuffle.hpp
new file mode 100644
index 0000000000..7d2d46a133
--- /dev/null
+++ b/boost/compute/algorithm/random_shuffle.hpp
@@ -0,0 +1,75 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP
+#define BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP
+
+#include <vector>
+#include <algorithm>
+
+#include <boost/range/algorithm_ext/iota.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/algorithm/scatter.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Randomly shuffles the elements in the range [\p first, \p last).
+///
+/// \see scatter()
+template<class Iterator>
+inline void random_shuffle(Iterator first,
+ Iterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return;
+ }
+
+ // generate shuffled indices on the host
+ std::vector<cl_uint> random_indices(count);
+ boost::iota(random_indices, 0);
+ std::random_shuffle(random_indices.begin(), random_indices.end());
+
+ // copy random indices to the device
+ const context &context = queue.get_context();
+ vector<cl_uint> indices(count, context);
+ ::boost::compute::copy(random_indices.begin(),
+ random_indices.end(),
+ indices.begin(),
+ queue);
+
+ // make a copy of the values on the device
+ vector<value_type> tmp(count, context);
+ ::boost::compute::copy(first,
+ last,
+ tmp.begin(),
+ queue);
+
+ // write values to their new locations
+ ::boost::compute::scatter(tmp.begin(),
+ tmp.end(),
+ indices.begin(),
+ first,
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP
diff --git a/boost/compute/algorithm/reduce.hpp b/boost/compute/algorithm/reduce.hpp
new file mode 100644
index 0000000000..79624a0e50
--- /dev/null
+++ b/boost/compute/algorithm/reduce.hpp
@@ -0,0 +1,301 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_REDUCE_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/array.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/algorithm/detail/inplace_reduce.hpp>
+#include <boost/compute/algorithm/detail/reduce_on_gpu.hpp>
+#include <boost/compute/algorithm/detail/serial_reduce.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/memory/local_buffer.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class BinaryFunction>
+size_t reduce(InputIterator first,
+ size_t count,
+ OutputIterator result,
+ size_t block_size,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ input_type;
+ typedef typename
+ boost::compute::result_of<BinaryFunction(input_type, input_type)>::type
+ result_type;
+
+ const context &context = queue.get_context();
+ size_t block_count = count / 2 / block_size;
+ size_t total_block_count =
+ static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size)));
+
+ if(block_count != 0){
+ meta_kernel k("block_reduce");
+ size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output");
+ size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block");
+
+ k <<
+ "const uint gid = get_global_id(0);\n" <<
+ "const uint lid = get_local_id(0);\n" <<
+
+ // copy values to local memory
+ "block[lid] = " <<
+ function(first[k.make_var<uint_>("gid*2+0")],
+ first[k.make_var<uint_>("gid*2+1")]) << ";\n" <<
+
+ // perform reduction
+ "for(uint i = 1; i < " << uint_(block_size) << "; i <<= 1){\n" <<
+ " barrier(CLK_LOCAL_MEM_FENCE);\n" <<
+ " uint mask = (i << 1) - 1;\n" <<
+ " if((lid & mask) == 0){\n" <<
+ " block[lid] = " <<
+ function(k.expr<input_type>("block[lid]"),
+ k.expr<input_type>("block[lid+i]")) << ";\n" <<
+ " }\n" <<
+ "}\n" <<
+
+ // write block result to global output
+ "if(lid == 0)\n" <<
+ " output[get_group_id(0)] = block[0];\n";
+
+ kernel kernel = k.compile(context);
+ kernel.set_arg(output_arg, result.get_buffer());
+ kernel.set_arg(block_arg, local_buffer<input_type>(block_size));
+
+ queue.enqueue_1d_range_kernel(kernel,
+ 0,
+ block_count * block_size,
+ block_size);
+ }
+
+ // serially reduce any leftovers
+ if(block_count * block_size * 2 < count){
+ size_t last_block_start = block_count * block_size * 2;
+
+ meta_kernel k("extra_serial_reduce");
+ size_t count_arg = k.add_arg<uint_>("count");
+ size_t offset_arg = k.add_arg<uint_>("offset");
+ size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output");
+ size_t output_offset_arg = k.add_arg<uint_>("output_offset");
+
+ k <<
+ k.decl<result_type>("result") << " = \n" <<
+ first[k.expr<uint_>("offset")] << ";\n" <<
+ "for(uint i = offset + 1; i < count; i++)\n" <<
+ " result = " <<
+ function(k.var<result_type>("result"),
+ first[k.var<uint_>("i")]) << ";\n" <<
+ "output[output_offset] = result;\n";
+
+ kernel kernel = k.compile(context);
+ kernel.set_arg(count_arg, static_cast<uint_>(count));
+ kernel.set_arg(offset_arg, static_cast<uint_>(last_block_start));
+ kernel.set_arg(output_arg, result.get_buffer());
+ kernel.set_arg(output_offset_arg, static_cast<uint_>(block_count));
+
+ queue.enqueue_task(kernel);
+ }
+
+ return total_block_count;
+}
+
+template<class InputIterator, class BinaryFunction>
+inline vector<
+ typename boost::compute::result_of<
+ BinaryFunction(
+ typename std::iterator_traits<InputIterator>::value_type,
+ typename std::iterator_traits<InputIterator>::value_type
+ )
+ >::type
+>
+block_reduce(InputIterator first,
+ size_t count,
+ size_t block_size,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ input_type;
+ typedef typename
+ boost::compute::result_of<BinaryFunction(input_type, input_type)>::type
+ result_type;
+
+ const context &context = queue.get_context();
+ size_t total_block_count =
+ static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size)));
+ vector<result_type> result_vector(total_block_count, context);
+
+ reduce(first, count, result_vector.begin(), block_size, function, queue);
+
+ return result_vector;
+}
+
+template<class InputIterator, class OutputIterator, class BinaryFunction>
+inline void generic_reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ input_type;
+ typedef typename
+ boost::compute::result_of<BinaryFunction(input_type, input_type)>::type
+ result_type;
+
+ const device &device = queue.get_device();
+ const context &context = queue.get_context();
+
+ size_t count = detail::iterator_range_size(first, last);
+
+ if(device.type() & device::cpu){
+ boost::compute::vector<result_type> value(1, context);
+ detail::serial_reduce(first, last, value.begin(), function, queue);
+ boost::compute::copy_n(value.begin(), 1, result, queue);
+ }
+ else {
+ size_t block_size = 256;
+
+ // first pass
+ vector<result_type> results = detail::block_reduce(first,
+ count,
+ block_size,
+ function,
+ queue);
+
+ if(results.size() > 1){
+ detail::inplace_reduce(results.begin(),
+ results.end(),
+ function,
+ queue);
+ }
+
+ boost::compute::copy_n(results.begin(), 1, result, queue);
+ }
+}
+
+template<class InputIterator, class OutputIterator, class T>
+inline void dispatch_reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ const plus<T> &function,
+ command_queue &queue)
+{
+ const context &context = queue.get_context();
+ const device &device = queue.get_device();
+
+ // reduce to temporary buffer on device
+ array<T, 1> tmp(context);
+ if(device.type() & device::cpu){
+ detail::serial_reduce(first, last, tmp.begin(), function, queue);
+ }
+ else {
+ reduce_on_gpu(first, last, tmp.begin(), function, queue);
+ }
+
+ // copy to result iterator
+ copy_n(tmp.begin(), 1, result, queue);
+}
+
+template<class InputIterator, class OutputIterator, class BinaryFunction>
+inline void dispatch_reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryFunction function,
+ command_queue &queue)
+{
+ generic_reduce(first, last, result, function, queue);
+}
+
+} // end detail namespace
+
+/// Returns the result of applying \p function to the elements in the
+/// range [\p first, \p last).
+///
+/// If no function is specified, \c plus will be used.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param result iterator pointing to the output
+/// \param function binary reduction function
+/// \param queue command queue to perform the operation
+///
+/// The \c reduce() algorithm assumes that the binary reduction function is
+/// associative. When used with non-associative functions the result may
+/// be non-deterministic and vary in precision. Notably this affects the
+/// \c plus<float>() function as floating-point addition is not associative
+/// and may produce slightly different results than a serial algorithm.
+///
+/// This algorithm supports both host and device iterators for the
+/// result argument. This allows for values to be reduced and copied
+/// to the host all with a single function call.
+///
+/// For example, to calculate the sum of the values in a device vector and
+/// copy the result to a value on the host:
+///
+/// \snippet test/test_reduce.cpp sum_int
+///
+/// Note that while the the \c reduce() algorithm is conceptually identical to
+/// the \c accumulate() algorithm, its implementation is substantially more
+/// efficient on parallel hardware. For more information, see the documentation
+/// on the \c accumulate() algorithm.
+///
+/// \see accumulate()
+template<class InputIterator, class OutputIterator, class BinaryFunction>
+inline void reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryFunction function,
+ command_queue &queue = system::default_queue())
+{
+ if(first == last){
+ return;
+ }
+
+ detail::dispatch_reduce(first, last, result, function, queue);
+}
+
+/// \overload
+template<class InputIterator, class OutputIterator>
+inline void reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+ if(first == last){
+ return;
+ }
+
+ detail::dispatch_reduce(first, last, result, plus<T>(), queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REDUCE_HPP
diff --git a/boost/compute/algorithm/reduce_by_key.hpp b/boost/compute/algorithm/reduce_by_key.hpp
new file mode 100644
index 0000000000..87c73e887f
--- /dev/null
+++ b/boost/compute/algorithm/reduce_by_key.hpp
@@ -0,0 +1,118 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP
+#define BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP
+
+#include <iterator>
+#include <utility>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/detail/reduce_by_key.hpp>
+
+namespace boost {
+namespace compute {
+
+/// The \c reduce_by_key() algorithm performs reduction for each contiguous
+/// subsequence of values determinate by equivalent keys.
+///
+/// Returns a pair of iterators at the end of the ranges [\p keys_result, keys_result_last)
+/// and [\p values_result, \p values_result_last).
+///
+/// If no function is specified, \c plus will be used.
+/// If no predicate is specified, \c equal_to will be used.
+///
+/// \param keys_first the first key
+/// \param keys_last the last key
+/// \param values_first the first input value
+/// \param keys_result iterator pointing to the key output
+/// \param values_result iterator pointing to the reduced value output
+/// \param function binary reduction function
+/// \param predicate binary predicate which returns true only if two keys are equal
+/// \param queue command queue to perform the operation
+///
+/// The \c reduce_by_key() algorithm assumes that the binary reduction function
+/// is associative. When used with non-associative functions the result may
+/// be non-deterministic and vary in precision. Notably this affects the
+/// \c plus<float>() function as floating-point addition is not associative
+/// and may produce slightly different results than a serial algorithm.
+///
+/// For example, to calculate the sum of the values for each key:
+///
+/// \snippet test/test_reduce_by_key.cpp reduce_by_key_int
+///
+/// \see reduce()
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction, class BinaryPredicate>
+inline std::pair<OutputKeyIterator, OutputValueIterator>
+reduce_by_key(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ BinaryFunction function,
+ BinaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return detail::dispatch_reduce_by_key(keys_first, keys_last, values_first,
+ keys_result, values_result,
+ function, predicate,
+ queue);
+}
+
+/// \overload
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator,
+ class BinaryFunction>
+inline std::pair<OutputKeyIterator, OutputValueIterator>
+reduce_by_key(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ BinaryFunction function,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type;
+
+ return reduce_by_key(keys_first, keys_last, values_first,
+ keys_result, values_result,
+ function, equal_to<key_type>(),
+ queue);
+}
+
+/// \overload
+template<class InputKeyIterator, class InputValueIterator,
+ class OutputKeyIterator, class OutputValueIterator>
+inline std::pair<OutputKeyIterator, OutputValueIterator>
+reduce_by_key(InputKeyIterator keys_first,
+ InputKeyIterator keys_last,
+ InputValueIterator values_first,
+ OutputKeyIterator keys_result,
+ OutputValueIterator values_result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type;
+ typedef typename std::iterator_traits<InputValueIterator>::value_type value_type;
+
+ return reduce_by_key(keys_first, keys_last, values_first,
+ keys_result, values_result,
+ plus<value_type>(), equal_to<key_type>(),
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP
diff --git a/boost/compute/algorithm/remove.hpp b/boost/compute/algorithm/remove.hpp
new file mode 100644
index 0000000000..98feb1f9d8
--- /dev/null
+++ b/boost/compute/algorithm/remove.hpp
@@ -0,0 +1,54 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_HPP
+#define BOOST_COMPUTE_ALGORITHM_REMOVE_HPP
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/remove_if.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Removes each element equal to \p value in the range [\p first,
+/// \p last).
+///
+/// \see remove_if()
+template<class Iterator, class T>
+inline Iterator remove(Iterator first,
+ Iterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ using ::boost::compute::_1;
+ using ::boost::compute::lambda::all;
+
+ if(vector_size<value_type>::value == 1){
+ return ::boost::compute::remove_if(first,
+ last,
+ _1 == value,
+ queue);
+ }
+ else {
+ return ::boost::compute::remove_if(first,
+ last,
+ all(_1 == value),
+ queue);
+ }
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REMOVE_HPP
diff --git a/boost/compute/algorithm/remove_if.hpp b/boost/compute/algorithm/remove_if.hpp
new file mode 100644
index 0000000000..5e416bef88
--- /dev/null
+++ b/boost/compute/algorithm/remove_if.hpp
@@ -0,0 +1,47 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/functional/logical.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Removes each element for which \p predicate returns \c true in the
+/// range [\p first, \p last).
+///
+/// \see remove()
+template<class Iterator, class Predicate>
+inline Iterator remove_if(Iterator first,
+ Iterator last,
+ Predicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ // temporary storage for the input data
+ ::boost::compute::vector<value_type> tmp(first, last, queue);
+
+ return ::boost::compute::copy_if(tmp.begin(),
+ tmp.end(),
+ first,
+ not1(predicate),
+ queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP
diff --git a/boost/compute/algorithm/replace.hpp b/boost/compute/algorithm/replace.hpp
new file mode 100644
index 0000000000..fd649a2fad
--- /dev/null
+++ b/boost/compute/algorithm/replace.hpp
@@ -0,0 +1,90 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_HPP
+#define BOOST_COMPUTE_ALGORITHM_REPLACE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator, class T>
+class replace_kernel : public meta_kernel
+{
+public:
+ replace_kernel()
+ : meta_kernel("replace")
+ {
+ m_count = 0;
+ }
+
+ void set_range(Iterator first, Iterator last)
+ {
+ m_count = detail::iterator_range_size(first, last);
+
+ *this <<
+ "const uint i = get_global_id(0);\n" <<
+ "if(" << first[var<cl_uint>("i")] << " == " << var<T>("old_value") << ")\n" <<
+ " " << first[var<cl_uint>("i")] << '=' << var<T>("new_value") << ";\n";
+ }
+
+ void set_old_value(const T &old_value)
+ {
+ add_set_arg<T>("old_value", old_value);
+ }
+
+ void set_new_value(const T &new_value)
+ {
+ add_set_arg<T>("new_value", new_value);
+ }
+
+ void exec(command_queue &queue)
+ {
+ if(m_count == 0){
+ // nothing to do
+ return;
+ }
+
+ exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} // end detail namespace
+
+/// Replaces each instance of \p old_value in the range [\p first,
+/// \p last) with \p new_value.
+template<class Iterator, class T>
+inline void replace(Iterator first,
+ Iterator last,
+ const T &old_value,
+ const T &new_value,
+ command_queue &queue = system::default_queue())
+{
+ detail::replace_kernel<Iterator, T> kernel;
+
+ kernel.set_range(first, last);
+ kernel.set_old_value(old_value);
+ kernel.set_new_value(new_value);
+
+ kernel.exec(queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REPLACE_HPP
diff --git a/boost/compute/algorithm/replace_copy.hpp b/boost/compute/algorithm/replace_copy.hpp
new file mode 100644
index 0000000000..7224bd3ae6
--- /dev/null
+++ b/boost/compute/algorithm/replace_copy.hpp
@@ -0,0 +1,62 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP
+#define BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/replace.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Copies the value in the range [\p first, \p last) to the range
+/// beginning at \p result while replacing each instance of \p old_value
+/// with \p new_value.
+///
+/// \see replace()
+template<class InputIterator, class OutputIterator, class T>
+inline OutputIterator
+replace_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ const T &old_value,
+ const T &new_value,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type;
+
+ difference_type count = std::distance(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ // copy data to result
+ ::boost::compute::copy(first, last, result, queue);
+
+ // replace in result
+ ::boost::compute::replace(result,
+ result + count,
+ old_value,
+ new_value,
+ queue);
+
+ // return iterator to the end of result
+ return result + count;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP
diff --git a/boost/compute/algorithm/reverse.hpp b/boost/compute/algorithm/reverse.hpp
new file mode 100644
index 0000000000..b6a9e8098c
--- /dev/null
+++ b/boost/compute/algorithm/reverse.hpp
@@ -0,0 +1,74 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_HPP
+#define BOOST_COMPUTE_ALGORITHM_REVERSE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator>
+struct reverse_kernel : public meta_kernel
+{
+ reverse_kernel(Iterator first, Iterator last)
+ : meta_kernel("reverse")
+ {
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ // store size of the range
+ m_size = detail::iterator_range_size(first, last);
+ add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size));
+
+ *this <<
+ decl<cl_uint>("i") << " = get_global_id(0);\n" <<
+ decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" <<
+ decl<value_type>("tmp") << "=" << first[var<cl_uint>("i")] << ";\n" <<
+ first[var<cl_uint>("i")] << "=" << first[var<cl_uint>("j")] << ";\n" <<
+ first[var<cl_uint>("j")] << "= tmp;\n";
+ }
+
+ void exec(command_queue &queue)
+ {
+ exec_1d(queue, 0, m_size / 2);
+ }
+
+ size_t m_size;
+};
+
+} // end detail namespace
+
+/// Reverses the elements in the range [\p first, \p last).
+///
+/// \see reverse_copy()
+template<class Iterator>
+inline void reverse(Iterator first,
+ Iterator last,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count < 2){
+ return;
+ }
+
+ detail::reverse_kernel<Iterator> kernel(first, last);
+
+ kernel.exec(queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REVERSE_HPP
diff --git a/boost/compute/algorithm/reverse_copy.hpp b/boost/compute/algorithm/reverse_copy.hpp
new file mode 100644
index 0000000000..c839f44651
--- /dev/null
+++ b/boost/compute/algorithm/reverse_copy.hpp
@@ -0,0 +1,79 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP
+#define BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator, class OutputIterator>
+struct reverse_copy_kernel : public meta_kernel
+{
+ reverse_copy_kernel(Iterator first, Iterator last, OutputIterator result)
+ : meta_kernel("reverse_copy")
+ {
+ // store size of the range
+ m_size = detail::iterator_range_size(first, last);
+ add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size));
+
+ *this <<
+ decl<cl_uint>("i") << " = get_global_id(0);\n" <<
+ decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" <<
+ result[var<cl_uint>("j")] << "=" << first[var<cl_uint>("i")] << ";\n";
+ }
+
+ void exec(command_queue &queue)
+ {
+ exec_1d(queue, 0, m_size);
+ }
+
+ size_t m_size;
+};
+
+} // end detail namespace
+
+/// Copies the elements in the range [\p first, \p last) in reversed
+/// order to the range beginning at \p result.
+///
+/// \see reverse()
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+reverse_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type;
+
+ difference_type count = std::distance(first, last);
+
+ detail::reverse_copy_kernel<InputIterator, OutputIterator>
+ kernel(first, last, result);
+
+ // run kernel
+ kernel.exec(queue);
+
+ // return iterator to the end of result
+ return result + count;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP
diff --git a/boost/compute/algorithm/rotate.hpp b/boost/compute/algorithm/rotate.hpp
new file mode 100644
index 0000000000..54cb073cc2
--- /dev/null
+++ b/boost/compute/algorithm/rotate.hpp
@@ -0,0 +1,54 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_HPP
+#define BOOST_COMPUTE_ALGORITHM_ROTATE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Performs left rotation such that element at \p n_first comes to the
+/// beginning.
+///
+/// \see rotate_copy()
+template<class InputIterator>
+inline void rotate(InputIterator first,
+ InputIterator n_first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ //Handle trivial cases
+ if (n_first==first || n_first==last)
+ {
+ return;
+ }
+
+ //Handle others
+ typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+ size_t count = detail::iterator_range_size(first, n_first);
+ size_t count2 = detail::iterator_range_size(first, last);
+
+ const context &context = queue.get_context();
+ vector<T> temp(count2, context);
+ ::boost::compute::copy(first, last, temp.begin(), queue);
+
+ ::boost::compute::copy(temp.begin()+count, temp.end(), first, queue);
+ ::boost::compute::copy(temp.begin(), temp.begin()+count, last-count, queue);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ROTATE_HPP
diff --git a/boost/compute/algorithm/rotate_copy.hpp b/boost/compute/algorithm/rotate_copy.hpp
new file mode 100644
index 0000000000..fa1b44c5e5
--- /dev/null
+++ b/boost/compute/algorithm/rotate_copy.hpp
@@ -0,0 +1,41 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP
+#define BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Performs left rotation such that element at n_first comes to the
+/// beginning and the output is stored in range starting at result.
+///
+/// \see rotate()
+template<class InputIterator, class OutputIterator>
+inline void rotate_copy(InputIterator first,
+ InputIterator n_first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, n_first);
+ size_t count2 = detail::iterator_range_size(n_first, last);
+
+ ::boost::compute::copy(first+count, last, result, queue);
+ ::boost::compute::copy(first, first+count, result+count2, queue);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP
diff --git a/boost/compute/algorithm/scatter.hpp b/boost/compute/algorithm/scatter.hpp
new file mode 100644
index 0000000000..bea4201628
--- /dev/null
+++ b/boost/compute/algorithm/scatter.hpp
@@ -0,0 +1,99 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_HPP
+#define BOOST_COMPUTE_ALGORITHM_SCATTER_HPP
+
+#include <boost/algorithm/string/replace.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class MapIterator, class OutputIterator>
+class scatter_kernel : meta_kernel
+{
+public:
+ scatter_kernel() : meta_kernel("scatter")
+ {}
+
+ void set_range(InputIterator first,
+ InputIterator last,
+ MapIterator map,
+ OutputIterator result)
+ {
+ m_count = iterator_range_size(first, last);
+ m_input_offset = first.get_index();
+ m_output_offset = result.get_index();
+
+ m_input_offset_arg = add_arg<uint_>("input_offset");
+ m_output_offset_arg = add_arg<uint_>("output_offset");
+
+ *this <<
+ "const uint i = get_global_id(0);\n" <<
+ "uint i1 = " << map[expr<uint_>("i")] <<
+ " + output_offset;\n" <<
+ "uint i2 = i + input_offset;\n" <<
+ result[expr<uint_>("i1")] << "=" <<
+ first[expr<uint_>("i2")] << ";\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ set_arg(m_input_offset_arg, uint_(m_input_offset));
+ set_arg(m_output_offset_arg, uint_(m_output_offset));
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+ size_t m_input_offset;
+ size_t m_input_offset_arg;
+ size_t m_output_offset;
+ size_t m_output_offset_arg;
+};
+
+} // end detail namespace
+
+/// Copies the elements from the range [\p first, \p last) to the range
+/// beginning at \p result using the output indices from the range beginning
+/// at \p map.
+///
+/// \see gather()
+template<class InputIterator, class MapIterator, class OutputIterator>
+inline void scatter(InputIterator first,
+ InputIterator last,
+ MapIterator map,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ detail::scatter_kernel<InputIterator, MapIterator, OutputIterator> kernel;
+
+ kernel.set_range(first, last, map, result);
+ kernel.exec(queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SCATTER_HPP
diff --git a/boost/compute/algorithm/scatter_if.hpp b/boost/compute/algorithm/scatter_if.hpp
new file mode 100644
index 0000000000..159edd8c86
--- /dev/null
+++ b/boost/compute/algorithm/scatter_if.hpp
@@ -0,0 +1,119 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Pola <jakub.pola@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP
+
+#include <boost/algorithm/string/replace.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator, class Predicate>
+class scatter_if_kernel : meta_kernel
+{
+public:
+ scatter_if_kernel() : meta_kernel("scatter_if")
+ {}
+
+ void set_range(InputIterator first,
+ InputIterator last,
+ MapIterator map,
+ StencilIterator stencil,
+ OutputIterator result,
+ Predicate predicate)
+ {
+ m_count = iterator_range_size(first, last);
+ m_input_offset = first.get_index();
+ m_output_offset = result.get_index();
+
+ m_input_offset_arg = add_arg<uint_>("input_offset");
+ m_output_offset_arg = add_arg<uint_>("output_offset");
+
+ *this <<
+ "const uint i = get_global_id(0);\n" <<
+ "uint i1 = " << map[expr<uint_>("i")] <<
+ " + output_offset;\n" <<
+ "uint i2 = i + input_offset;\n" <<
+ if_(predicate(stencil[expr<uint_>("i")])) << "\n" <<
+ result[expr<uint_>("i1")] << "=" <<
+ first[expr<uint_>("i2")] << ";\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ set_arg(m_input_offset_arg, uint_(m_input_offset));
+ set_arg(m_output_offset_arg, uint_(m_output_offset));
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+ size_t m_input_offset;
+ size_t m_input_offset_arg;
+ size_t m_output_offset;
+ size_t m_output_offset_arg;
+};
+
+} // end detail namespace
+
+/// Copies the elements from the range [\p first, \p last) to the range
+/// beginning at \p result using the output indices from the range beginning
+/// at \p map if stencil is resolved to true. By default the predicate is
+/// an identity
+///
+///
+template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator,
+ class Predicate>
+inline void scatter_if(InputIterator first,
+ InputIterator last,
+ MapIterator map,
+ StencilIterator stencil,
+ OutputIterator result,
+ Predicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ detail::scatter_if_kernel<InputIterator, MapIterator, StencilIterator, OutputIterator, Predicate> kernel;
+
+ kernel.set_range(first, last, map, stencil, result, predicate);
+ kernel.exec(queue);
+}
+
+template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator>
+inline void scatter_if(InputIterator first,
+ InputIterator last,
+ MapIterator map,
+ StencilIterator stencil,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<StencilIterator>::value_type T;
+
+ scatter_if(first, last, map, stencil, result, identity<T>(), queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP
diff --git a/boost/compute/algorithm/search.hpp b/boost/compute/algorithm/search.hpp
new file mode 100644
index 0000000000..3d3d035b3c
--- /dev/null
+++ b/boost/compute/algorithm/search.hpp
@@ -0,0 +1,73 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SEARCH_HPP
+#define BOOST_COMPUTE_ALGORITHM_SEARCH_HPP
+
+#include <boost/compute/algorithm/detail/search_all.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// \brief Substring matching algorithm
+///
+/// Searches for the first match of the pattern [p_first, p_last)
+/// in text [t_first, t_last).
+/// \return Iterator pointing to beginning of first occurrence
+///
+/// \param t_first Iterator pointing to start of text
+/// \param t_last Iterator pointing to end of text
+/// \param p_first Iterator pointing to start of pattern
+/// \param p_last Iterator pointing to end of pattern
+/// \param queue Queue on which to execute
+///
+template<class TextIterator, class PatternIterator>
+inline TextIterator search(TextIterator t_first,
+ TextIterator t_last,
+ PatternIterator p_first,
+ PatternIterator p_last,
+ command_queue &queue = system::default_queue())
+{
+ // there is no need to check if pattern starts at last n - 1 indices
+ vector<uint_> matching_indices(
+ detail::iterator_range_size(t_first, t_last)
+ - detail::iterator_range_size(p_first, p_last) + 1,
+ queue.get_context()
+ );
+
+ // search_kernel puts value 1 at every index in vector where pattern starts at
+ detail::search_kernel<PatternIterator,
+ TextIterator,
+ vector<uint_>::iterator> kernel;
+
+ kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin());
+ kernel.exec(queue);
+
+ vector<uint_>::iterator index = ::boost::compute::find(
+ matching_indices.begin(), matching_indices.end(), uint_(1), queue
+ );
+
+ // pattern was not found
+ if(index == matching_indices.end())
+ return t_last;
+
+ return t_first + detail::iterator_range_size(matching_indices.begin(), index);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SEARCH_HPP
diff --git a/boost/compute/algorithm/search_n.hpp b/boost/compute/algorithm/search_n.hpp
new file mode 100644
index 0000000000..9e03111bb0
--- /dev/null
+++ b/boost/compute/algorithm/search_n.hpp
@@ -0,0 +1,140 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Search kernel class
+///
+/// Subclass of meta_kernel which is capable of performing search_n
+///
+template<class TextIterator, class OutputIterator>
+class search_n_kernel : public meta_kernel
+{
+public:
+ typedef typename std::iterator_traits<TextIterator>::value_type value_type;
+
+ search_n_kernel() : meta_kernel("search_n")
+ {}
+
+ void set_range(TextIterator t_first,
+ TextIterator t_last,
+ value_type value,
+ size_t n,
+ OutputIterator result)
+ {
+ m_n = n;
+ m_n_arg = add_arg<uint_>("n");
+
+ m_value = value;
+ m_value_arg = add_arg<value_type>("value");
+
+ m_count = iterator_range_size(t_first, t_last);
+ m_count = m_count + 1 - m_n;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint i1 = i;\n" <<
+ "uint j;\n" <<
+ "for(j = 0; j<n; j++,i++)\n" <<
+ "{\n" <<
+ " if(value != " << t_first[expr<uint_>("i")] << ")\n" <<
+ " j = n + 1;\n" <<
+ "}\n" <<
+ "if(j == n)\n" <<
+ result[expr<uint_>("i1")] << " = 1;\n" <<
+ "else\n" <<
+ result[expr<uint_>("i1")] << " = 0;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ set_arg(m_n_arg, uint_(m_n));
+ set_arg(m_value_arg, m_value);
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_n;
+ size_t m_n_arg;
+ size_t m_count;
+ value_type m_value;
+ size_t m_value_arg;
+};
+
+} //end detail namespace
+
+///
+/// \brief Substring matching algorithm
+///
+/// Searches for the first occurrence of n consecutive occurrences of
+/// value in text [t_first, t_last).
+/// \return Iterator pointing to beginning of first occurrence
+///
+/// \param t_first Iterator pointing to start of text
+/// \param t_last Iterator pointing to end of text
+/// \param n Number of times value repeats
+/// \param value Value which repeats
+/// \param queue Queue on which to execute
+///
+template<class TextIterator, class ValueType>
+inline TextIterator search_n(TextIterator t_first,
+ TextIterator t_last,
+ size_t n,
+ ValueType value,
+ command_queue &queue = system::default_queue())
+{
+ // there is no need to check if pattern starts at last n - 1 indices
+ vector<uint_> matching_indices(
+ detail::iterator_range_size(t_first, t_last) + 1 - n,
+ queue.get_context()
+ );
+
+ // search_n_kernel puts value 1 at every index in vector where pattern
+ // of n values starts at
+ detail::search_n_kernel<TextIterator,
+ vector<uint_>::iterator> kernel;
+
+ kernel.set_range(t_first, t_last, value, n, matching_indices.begin());
+ kernel.exec(queue);
+
+ vector<uint_>::iterator index = ::boost::compute::find(
+ matching_indices.begin(), matching_indices.end(), uint_(1), queue
+ );
+
+ // pattern was not found
+ if(index == matching_indices.end())
+ return t_last;
+
+ return t_first + detail::iterator_range_size(matching_indices.begin(), index);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP
diff --git a/boost/compute/algorithm/set_difference.hpp b/boost/compute/algorithm/set_difference.hpp
new file mode 100644
index 0000000000..17ce7bd3f6
--- /dev/null
+++ b/boost/compute/algorithm/set_difference.hpp
@@ -0,0 +1,182 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/detail/compact.hpp>
+#include <boost/compute/algorithm/detail/balanced_path.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Serial set difference kernel class
+///
+/// Subclass of meta_kernel to perform serial set difference after tiling
+///
+class serial_set_difference_kernel : meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ serial_set_difference_kernel() : meta_kernel("set_difference")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator1, class OutputIterator2>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator1 result,
+ OutputIterator2 counts)
+ {
+ m_count = iterator_range_size(tile_first1, tile_last1) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
+ "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
+ "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
+ "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
+ "uint index = i*" << tile_size << ";\n" <<
+ "uint count = 0;\n" <<
+ "while(start1<end1 && start2<end2)\n" <<
+ "{\n" <<
+ " if(" << first1[expr<uint_>("start1")] << " == " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ " start1++; start2++;\n" <<
+ " }\n" <<
+ " else if(" << first1[expr<uint_>("start1")] << " < " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++;\n" <<
+ " }\n" <<
+ " else\n" <<
+ " {\n" <<
+ " start2++;\n" <<
+ " }\n" <<
+ "}\n" <<
+ "while(start1<end1)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++;\n" <<
+ "}\n" <<
+ counts[expr<uint_>("i")] << " = count;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} //end detail namespace
+
+///
+/// \brief Set difference algorithm
+///
+/// Finds the difference of the sorted range [first2, last2) from the sorted
+/// range [first1, last1) and stores it in range starting at result
+/// \return Iterator pointing to end of difference
+///
+/// \param first1 Iterator pointing to start of first set
+/// \param last1 Iterator pointing to end of first set
+/// \param first2 Iterator pointing to start of second set
+/// \param last2 Iterator pointing to end of second set
+/// \param result Iterator pointing to start of range in which the difference
+/// will be stored
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+inline OutputIterator set_difference(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ int tile_size = 1024;
+
+ int count1 = detail::iterator_range_size(first1, last1);
+ int count2 = detail::iterator_range_size(first2, last2);
+
+ vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+ vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+
+ // Tile the sets
+ detail::balanced_path_kernel tiling_kernel;
+ tiling_kernel.tile_size = tile_size;
+ tiling_kernel.set_range(first1, last1, first2, last2,
+ tile_a.begin()+1, tile_b.begin()+1);
+ fill_n(tile_a.begin(), 1, 0, queue);
+ fill_n(tile_b.begin(), 1, 0, queue);
+ tiling_kernel.exec(queue);
+
+ fill_n(tile_a.end()-1, 1, count1, queue);
+ fill_n(tile_b.end()-1, 1, count2, queue);
+
+ vector<value_type> temp_result(count1+count2, queue.get_context());
+ vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
+ fill_n(counts.end()-1, 1, 0, queue);
+
+ // Find individual differences
+ detail::serial_set_difference_kernel difference_kernel;
+ difference_kernel.tile_size = tile_size;
+ difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
+ tile_b.begin(), temp_result.begin(), counts.begin());
+
+ difference_kernel.exec(queue);
+
+ exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue);
+
+ // Compact the results
+ detail::compact_kernel compact_kernel;
+ compact_kernel.tile_size = tile_size;
+ compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
+
+ compact_kernel.exec(queue);
+
+ return result + (counts.end() - 1).read(queue);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP
diff --git a/boost/compute/algorithm/set_intersection.hpp b/boost/compute/algorithm/set_intersection.hpp
new file mode 100644
index 0000000000..50f291e84a
--- /dev/null
+++ b/boost/compute/algorithm/set_intersection.hpp
@@ -0,0 +1,170 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP
+#define BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/detail/compact.hpp>
+#include <boost/compute/algorithm/detail/balanced_path.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Serial set intersection kernel class
+///
+/// Subclass of meta_kernel to perform serial set intersection after tiling
+///
+class serial_set_intersection_kernel : meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ serial_set_intersection_kernel() : meta_kernel("set_intersection")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator1, class OutputIterator2>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator1 result,
+ OutputIterator2 counts)
+ {
+ m_count = iterator_range_size(tile_first1, tile_last1) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
+ "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
+ "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
+ "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
+ "uint index = i*" << tile_size << ";\n" <<
+ "uint count = 0;\n" <<
+ "while(start1<end1 && start2<end2)\n" <<
+ "{\n" <<
+ " if(" << first1[expr<uint_>("start1")] << " == " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++; start2++;\n" <<
+ " }\n" <<
+ " else if(" << first1[expr<uint_>("start1")] << " < " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " start1++;\n" <<
+ " else start2++;\n" <<
+ "}\n" <<
+ counts[expr<uint_>("i")] << " = count;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} //end detail namespace
+
+///
+/// \brief Set intersection algorithm
+///
+/// Finds the intersection of the sorted range [first1, last1) with the sorted
+/// range [first2, last2) and stores it in range starting at result
+/// \return Iterator pointing to end of intersection
+///
+/// \param first1 Iterator pointing to start of first set
+/// \param last1 Iterator pointing to end of first set
+/// \param first2 Iterator pointing to start of second set
+/// \param last2 Iterator pointing to end of second set
+/// \param result Iterator pointing to start of range in which the intersection
+/// will be stored
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+inline OutputIterator set_intersection(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ int tile_size = 1024;
+
+ int count1 = detail::iterator_range_size(first1, last1);
+ int count2 = detail::iterator_range_size(first2, last2);
+
+ vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+ vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+
+ // Tile the sets
+ detail::balanced_path_kernel tiling_kernel;
+ tiling_kernel.tile_size = tile_size;
+ tiling_kernel.set_range(first1, last1, first2, last2,
+ tile_a.begin()+1, tile_b.begin()+1);
+ fill_n(tile_a.begin(), 1, 0, queue);
+ fill_n(tile_b.begin(), 1, 0, queue);
+ tiling_kernel.exec(queue);
+
+ fill_n(tile_a.end()-1, 1, count1, queue);
+ fill_n(tile_b.end()-1, 1, count2, queue);
+
+ vector<value_type> temp_result(count1+count2, queue.get_context());
+ vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
+ fill_n(counts.end()-1, 1, 0, queue);
+
+ // Find individual intersections
+ detail::serial_set_intersection_kernel intersection_kernel;
+ intersection_kernel.tile_size = tile_size;
+ intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
+ tile_b.begin(), temp_result.begin(), counts.begin());
+
+ intersection_kernel.exec(queue);
+
+ exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue);
+
+ // Compact the results
+ detail::compact_kernel compact_kernel;
+ compact_kernel.tile_size = tile_size;
+ compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
+
+ compact_kernel.exec(queue);
+
+ return result + (counts.end() - 1).read(queue);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP
diff --git a/boost/compute/algorithm/set_symmetric_difference.hpp b/boost/compute/algorithm/set_symmetric_difference.hpp
new file mode 100644
index 0000000000..6e60b38511
--- /dev/null
+++ b/boost/compute/algorithm/set_symmetric_difference.hpp
@@ -0,0 +1,194 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/detail/compact.hpp>
+#include <boost/compute/algorithm/detail/balanced_path.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Serial set symmetric difference kernel class
+///
+/// Subclass of meta_kernel to perform serial set symmetric
+/// difference after tiling
+///
+class serial_set_symmetric_difference_kernel : meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ serial_set_symmetric_difference_kernel() : meta_kernel("set_symmetric_difference")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator1, class OutputIterator2>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator1 result,
+ OutputIterator2 counts)
+ {
+ m_count = iterator_range_size(tile_first1, tile_last1) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
+ "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
+ "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
+ "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
+ "uint index = i*" << tile_size << ";\n" <<
+ "uint count = 0;\n" <<
+ "while(start1<end1 && start2<end2)\n" <<
+ "{\n" <<
+ " if(" << first1[expr<uint_>("start1")] << " == " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ " start1++; start2++;\n" <<
+ " }\n" <<
+ " else if(" << first1[expr<uint_>("start1")] << " < " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++;\n" <<
+ " }\n" <<
+ " else\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first2[expr<uint_>("start2")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start2++;\n" <<
+ " }\n" <<
+ "}\n" <<
+ "while(start1<end1)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++;\n" <<
+ "}\n" <<
+ "while(start2<end2)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first2[expr<uint_>("start2")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start2++;\n" <<
+ "}\n" <<
+ counts[expr<uint_>("i")] << " = count;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} //end detail namespace
+
+///
+/// \brief Set symmetric difference algorithm
+///
+/// Finds the symmetric difference of the sorted range [first2, last2) from
+/// the sorted range [first1, last1) and stores it in range starting at result
+/// \return Iterator pointing to end of symmetric difference
+///
+/// \param first1 Iterator pointing to start of first set
+/// \param last1 Iterator pointing to end of first set
+/// \param first2 Iterator pointing to start of second set
+/// \param last2 Iterator pointing to end of second set
+/// \param result Iterator pointing to start of range in which the symmetric
+/// difference will be stored
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+inline OutputIterator set_symmetric_difference(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ int tile_size = 1024;
+
+ int count1 = detail::iterator_range_size(first1, last1);
+ int count2 = detail::iterator_range_size(first2, last2);
+
+ vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+ vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+
+ // Tile the sets
+ detail::balanced_path_kernel tiling_kernel;
+ tiling_kernel.tile_size = tile_size;
+ tiling_kernel.set_range(first1, last1, first2, last2,
+ tile_a.begin()+1, tile_b.begin()+1);
+ fill_n(tile_a.begin(), 1, 0, queue);
+ fill_n(tile_b.begin(), 1, 0, queue);
+ tiling_kernel.exec(queue);
+
+ fill_n(tile_a.end()-1, 1, count1, queue);
+ fill_n(tile_b.end()-1, 1, count2, queue);
+
+ vector<value_type> temp_result(count1+count2, queue.get_context());
+ vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
+ fill_n(counts.end()-1, 1, 0, queue);
+
+ // Find individual symmetric differences
+ detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel;
+ symmetric_difference_kernel.tile_size = tile_size;
+ symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(),
+ tile_a.end(), tile_b.begin(),
+ temp_result.begin(), counts.begin());
+
+ symmetric_difference_kernel.exec(queue);
+
+ exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue);
+
+ // Compact the results
+ detail::compact_kernel compact_kernel;
+ compact_kernel.tile_size = tile_size;
+ compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
+
+ compact_kernel.exec(queue);
+
+ return result + (counts.end() - 1).read(queue);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP
diff --git a/boost/compute/algorithm/set_union.hpp b/boost/compute/algorithm/set_union.hpp
new file mode 100644
index 0000000000..c61f7b29b3
--- /dev/null
+++ b/boost/compute/algorithm/set_union.hpp
@@ -0,0 +1,195 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP
+#define BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/detail/balanced_path.hpp>
+#include <boost/compute/algorithm/detail/compact.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/system.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+///
+/// \brief Serial set union kernel class
+///
+/// Subclass of meta_kernel to perform serial set union after tiling
+///
+class serial_set_union_kernel : meta_kernel
+{
+public:
+ unsigned int tile_size;
+
+ serial_set_union_kernel() : meta_kernel("set_union")
+ {
+ tile_size = 4;
+ }
+
+ template<class InputIterator1, class InputIterator2,
+ class InputIterator3, class InputIterator4,
+ class OutputIterator1, class OutputIterator2>
+ void set_range(InputIterator1 first1,
+ InputIterator2 first2,
+ InputIterator3 tile_first1,
+ InputIterator3 tile_last1,
+ InputIterator4 tile_first2,
+ OutputIterator1 result,
+ OutputIterator2 counts)
+ {
+ m_count = iterator_range_size(tile_first1, tile_last1) - 1;
+
+ *this <<
+ "uint i = get_global_id(0);\n" <<
+ "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
+ "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
+ "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
+ "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
+ "uint index = i*" << tile_size << ";\n" <<
+ "uint count = 0;\n" <<
+ "while(start1<end1 && start2<end2)\n" <<
+ "{\n" <<
+ " if(" << first1[expr<uint_>("start1")] << " == " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++; start2++;\n" <<
+ " }\n" <<
+ " else if(" << first1[expr<uint_>("start1")] << " < " <<
+ first2[expr<uint_>("start2")] << ")\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++;\n" <<
+ " }\n" <<
+ " else\n" <<
+ " {\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first2[expr<uint_>("start2")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start2++;\n" <<
+ " }\n" <<
+ "}\n" <<
+ "while(start1<end1)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first1[expr<uint_>("start1")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start1++;\n" <<
+ "}\n" <<
+ "while(start2<end2)\n" <<
+ "{\n" <<
+ result[expr<uint_>("index")] <<
+ " = " << first2[expr<uint_>("start2")] << ";\n" <<
+ " index++; count++;\n" <<
+ " start2++;\n" <<
+ "}\n" <<
+ counts[expr<uint_>("i")] << " = count;\n";
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0) {
+ return event();
+ }
+
+ return exec_1d(queue, 0, m_count);
+ }
+
+private:
+ size_t m_count;
+};
+
+} //end detail namespace
+
+///
+/// \brief Set union algorithm
+///
+/// Finds the union of the sorted range [first1, last1) with the sorted
+/// range [first2, last2) and stores it in range starting at result
+/// \return Iterator pointing to end of union
+///
+/// \param first1 Iterator pointing to start of first set
+/// \param last1 Iterator pointing to end of first set
+/// \param first2 Iterator pointing to start of second set
+/// \param last2 Iterator pointing to end of second set
+/// \param result Iterator pointing to start of range in which the union
+/// will be stored
+/// \param queue Queue on which to execute
+///
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+inline OutputIterator set_union(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
+
+ int tile_size = 1024;
+
+ int count1 = detail::iterator_range_size(first1, last1);
+ int count2 = detail::iterator_range_size(first2, last2);
+
+ vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+ vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
+
+ // Tile the sets
+ detail::balanced_path_kernel tiling_kernel;
+ tiling_kernel.tile_size = tile_size;
+ tiling_kernel.set_range(first1, last1, first2, last2,
+ tile_a.begin()+1, tile_b.begin()+1);
+ fill_n(tile_a.begin(), 1, 0, queue);
+ fill_n(tile_b.begin(), 1, 0, queue);
+ tiling_kernel.exec(queue);
+
+ fill_n(tile_a.end()-1, 1, count1, queue);
+ fill_n(tile_b.end()-1, 1, count2, queue);
+
+ vector<value_type> temp_result(count1+count2, queue.get_context());
+ vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
+ fill_n(counts.end()-1, 1, 0, queue);
+
+ // Find individual unions
+ detail::serial_set_union_kernel union_kernel;
+ union_kernel.tile_size = tile_size;
+ union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
+ tile_b.begin(), temp_result.begin(), counts.begin());
+
+ union_kernel.exec(queue);
+
+ exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue);
+
+ // Compact the results
+ detail::compact_kernel compact_kernel;
+ compact_kernel.tile_size = tile_size;
+ compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
+
+ compact_kernel.exec(queue);
+
+ return result + (counts.end() - 1).read(queue);
+}
+
+} //end compute namespace
+} //end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP
diff --git a/boost/compute/algorithm/sort.hpp b/boost/compute/algorithm/sort.hpp
new file mode 100644
index 0000000000..b2730b3e2b
--- /dev/null
+++ b/boost/compute/algorithm/sort.hpp
@@ -0,0 +1,194 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SORT_HPP
+#define BOOST_COMPUTE_ALGORITHM_SORT_HPP
+
+#include <iterator>
+
+#include <boost/utility/enable_if.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp>
+#include <boost/compute/algorithm/detail/radix_sort.hpp>
+#include <boost/compute/algorithm/detail/insertion_sort.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+#include <boost/compute/container/mapped_view.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+inline void dispatch_gpu_sort(buffer_iterator<T> first,
+ buffer_iterator<T> last,
+ less<T>,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ is_radix_sortable<T>::value
+ >::type* = 0)
+{
+ size_t count = detail::iterator_range_size(first, last);
+
+ if(count < 2){
+ // nothing to do
+ return;
+ }
+ else if(count <= 32){
+ ::boost::compute::detail::serial_insertion_sort(first, last, queue);
+ }
+ else {
+ ::boost::compute::detail::radix_sort(first, last, queue);
+ }
+}
+
+template<class T>
+inline void dispatch_gpu_sort(buffer_iterator<T> first,
+ buffer_iterator<T> last,
+ greater<T> compare,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ is_radix_sortable<T>::value
+ >::type* = 0)
+{
+ size_t count = detail::iterator_range_size(first, last);
+
+ if(count < 2){
+ // nothing to do
+ return;
+ }
+ else if(count <= 32){
+ ::boost::compute::detail::serial_insertion_sort(
+ first, last, compare, queue
+ );
+ }
+ else {
+ // radix sort in ascending order
+ ::boost::compute::detail::radix_sort(first, last, queue);
+
+ // reverse range to descending order
+ ::boost::compute::reverse(first, last, queue);
+ }
+}
+
+template<class Iterator, class Compare>
+inline void dispatch_gpu_sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue)
+{
+ ::boost::compute::detail::serial_insertion_sort(
+ first, last, compare, queue
+ );
+}
+
+// sort() for device iterators
+template<class Iterator, class Compare>
+inline void dispatch_sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue,
+ typename boost::enable_if<
+ is_device_iterator<Iterator>
+ >::type* = 0)
+{
+ if(queue.get_device().type() & device::gpu) {
+ dispatch_gpu_sort(first, last, compare, queue);
+ return;
+ }
+ ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue);
+}
+
+// sort() for host iterators
+template<class Iterator, class Compare>
+inline void dispatch_sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue,
+ typename boost::disable_if<
+ is_device_iterator<Iterator>
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<Iterator>::value_type T;
+
+ size_t size = static_cast<size_t>(std::distance(first, last));
+
+ // create mapped buffer
+ mapped_view<T> view(
+ boost::addressof(*first), size, queue.get_context()
+ );
+
+ // sort mapped buffer
+ dispatch_sort(view.begin(), view.end(), compare, queue);
+
+ // return results to host
+ view.map(queue);
+}
+
+} // end detail namespace
+
+/// Sorts the values in the range [\p first, \p last) according to
+/// \p compare.
+///
+/// \param first first element in the range to sort
+/// \param last last element in the range to sort
+/// \param compare comparison function (by default \c less)
+/// \param queue command queue to perform the operation
+///
+/// For example, to sort a vector on the device:
+/// \code
+/// // create vector on the device with data
+/// float data[] = { 2.f, 4.f, 1.f, 3.f };
+/// boost::compute::vector<float> vec(data, data + 4, queue);
+///
+/// // sort the vector on the device
+/// boost::compute::sort(vec.begin(), vec.end(), queue);
+/// \endcode
+///
+/// The sort() algorithm can also be directly used with host iterators. This
+/// example will automatically transfer the data to the device, sort it, and
+/// then transfer the data back to the host:
+/// \code
+/// std::vector<int> data = { 9, 3, 2, 5, 1, 4, 6, 7 };
+///
+/// boost::compute::sort(data.begin(), data.end(), queue);
+/// \endcode
+///
+/// \see is_sorted()
+template<class Iterator, class Compare>
+inline void sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::detail::dispatch_sort(first, last, compare, queue);
+}
+
+/// \overload
+template<class Iterator>
+inline void sort(Iterator first,
+ Iterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ ::boost::compute::sort(
+ first, last, ::boost::compute::less<value_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SORT_HPP
diff --git a/boost/compute/algorithm/sort_by_key.hpp b/boost/compute/algorithm/sort_by_key.hpp
new file mode 100644
index 0000000000..0e3dba81eb
--- /dev/null
+++ b/boost/compute/algorithm/sort_by_key.hpp
@@ -0,0 +1,156 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP
+#define BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP
+
+#include <iterator>
+
+#include <boost/utility/enable_if.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp>
+#include <boost/compute/algorithm/detail/insertion_sort.hpp>
+#include <boost/compute/algorithm/detail/radix_sort.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+namespace detail {
+
+template<class KeyIterator, class ValueIterator>
+inline void
+dispatch_gpu_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ less<typename std::iterator_traits<KeyIterator>::value_type> compare,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ is_radix_sortable<
+ typename std::iterator_traits<KeyIterator>::value_type
+ >::value
+ >::type* = 0)
+{
+ size_t count = detail::iterator_range_size(keys_first, keys_last);
+
+ if(count < 32){
+ detail::serial_insertion_sort_by_key(
+ keys_first, keys_last, values_first, compare, queue
+ );
+ }
+ else {
+ detail::radix_sort_by_key(
+ keys_first, keys_last, values_first, queue
+ );
+ }
+}
+
+template<class KeyIterator, class ValueIterator>
+inline void
+dispatch_gpu_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ greater<typename std::iterator_traits<KeyIterator>::value_type> compare,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ is_radix_sortable<
+ typename std::iterator_traits<KeyIterator>::value_type
+ >::value
+ >::type* = 0)
+{
+ size_t count = detail::iterator_range_size(keys_first, keys_last);
+
+ if(count < 32){
+ detail::serial_insertion_sort_by_key(
+ keys_first, keys_last, values_first, compare, queue
+ );
+ }
+ else {
+ // radix sorts in ascending order
+ detail::radix_sort_by_key(
+ keys_first, keys_last, values_first, queue
+ );
+
+ // Reverse keys, values for descending order
+ ::boost::compute::reverse(keys_first, keys_last, queue);
+ ::boost::compute::reverse(values_first, values_first + count, queue);
+ }
+}
+
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void dispatch_gpu_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ Compare compare,
+ command_queue &queue)
+{
+ detail::serial_insertion_sort_by_key(
+ keys_first, keys_last, values_first, compare, queue
+ );
+}
+
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void dispatch_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ Compare compare,
+ command_queue &queue)
+{
+ if(queue.get_device().type() & device::gpu) {
+ dispatch_gpu_sort_by_key(keys_first, keys_last, values_first, compare, queue);
+ return;
+ }
+ ::boost::compute::detail::merge_sort_by_key_on_cpu(
+ keys_first, keys_last, values_first, compare, queue
+ );
+}
+
+} // end detail namespace
+
+/// Performs a key-value sort using the keys in the range [\p keys_first,
+/// \p keys_last) on the values in the range [\p values_first,
+/// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare.
+///
+/// If no compare function is specified, \c less is used.
+///
+/// \see sort()
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::detail::dispatch_sort_by_key(
+ keys_first, keys_last, values_first, compare, queue
+ );
+}
+
+/// \overload
+template<class KeyIterator, class ValueIterator>
+inline void sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+
+ ::boost::compute::sort_by_key(
+ keys_first, keys_last, values_first, less<key_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP
diff --git a/boost/compute/algorithm/stable_partition.hpp b/boost/compute/algorithm/stable_partition.hpp
new file mode 100644
index 0000000000..283b068283
--- /dev/null
+++ b/boost/compute/algorithm/stable_partition.hpp
@@ -0,0 +1,72 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP
+#define BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// \brief Partitioning algorithm
+///
+/// Partitions the elements in the range [\p first, \p last) according to
+/// \p predicate. The order of the elements is preserved.
+/// \return Iterator pointing to end of true values
+///
+/// \param first Iterator pointing to start of range
+/// \param last Iterator pointing to end of range
+/// \param predicate Unary predicate to be applied on each element
+/// \param queue Queue on which to execute
+///
+/// \see is_partitioned() and partition()
+///
+template<class Iterator, class UnaryPredicate>
+inline Iterator stable_partition(Iterator first,
+ Iterator last,
+ UnaryPredicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ // make temporary copy of the input
+ ::boost::compute::vector<value_type> tmp(first, last, queue);
+
+ // copy true values
+ Iterator last_true =
+ ::boost::compute::copy_if(tmp.begin(),
+ tmp.end(),
+ first,
+ predicate,
+ queue);
+
+ // copy false values
+ Iterator last_false =
+ ::boost::compute::copy_if(tmp.begin(),
+ tmp.end(),
+ last_true,
+ not1(predicate),
+ queue);
+
+ // return iterator pointing to the last true value
+ return last_true;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP
diff --git a/boost/compute/algorithm/stable_sort.hpp b/boost/compute/algorithm/stable_sort.hpp
new file mode 100644
index 0000000000..cd82a0a606
--- /dev/null
+++ b/boost/compute/algorithm/stable_sort.hpp
@@ -0,0 +1,99 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP
+#define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp>
+#include <boost/compute/algorithm/detail/radix_sort.hpp>
+#include <boost/compute/algorithm/detail/insertion_sort.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+#include <boost/compute/functional/operator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator, class Compare>
+inline void dispatch_gpu_stable_sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue)
+{
+ ::boost::compute::detail::serial_insertion_sort(
+ first, last, compare, queue
+ );
+}
+
+template<class T>
+inline typename boost::enable_if_c<is_radix_sortable<T>::value>::type
+dispatch_gpu_stable_sort(buffer_iterator<T> first,
+ buffer_iterator<T> last,
+ less<T>,
+ command_queue &queue)
+{
+ ::boost::compute::detail::radix_sort(first, last, queue);
+}
+
+template<class T>
+inline typename boost::enable_if_c<is_radix_sortable<T>::value>::type
+dispatch_gpu_stable_sort(buffer_iterator<T> first,
+ buffer_iterator<T> last,
+ greater<T>,
+ command_queue &queue)
+{
+ // radix sort in ascending order
+ ::boost::compute::detail::radix_sort(first, last, queue);
+
+ // reverse range to descending order
+ ::boost::compute::reverse(first, last, queue);
+}
+
+} // end detail namespace
+
+/// Sorts the values in the range [\p first, \p last) according to
+/// \p compare. The relative order of identical values is preserved.
+///
+/// \see sort(), is_sorted()
+template<class Iterator, class Compare>
+inline void stable_sort(Iterator first,
+ Iterator last,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ if(queue.get_device().type() & device::gpu) {
+ ::boost::compute::detail::dispatch_gpu_stable_sort(
+ first, last, compare, queue
+ );
+ }
+ ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue);
+}
+
+/// \overload
+template<class Iterator>
+inline void stable_sort(Iterator first,
+ Iterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+
+ ::boost::compute::less<value_type> less;
+
+ ::boost::compute::stable_sort(first, last, less, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP
diff --git a/boost/compute/algorithm/stable_sort_by_key.hpp b/boost/compute/algorithm/stable_sort_by_key.hpp
new file mode 100644
index 0000000000..8a51372ede
--- /dev/null
+++ b/boost/compute/algorithm/stable_sort_by_key.hpp
@@ -0,0 +1,61 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP
+#define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP
+
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/sort_by_key.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Performs a key-value stable sort using the keys in the range [\p keys_first,
+/// \p keys_last) on the values in the range [\p values_first,
+/// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare.
+///
+/// If no compare function is specified, \c less is used.
+///
+/// \see sort()
+template<class KeyIterator, class ValueIterator, class Compare>
+inline void stable_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ // sort_by_key is stable
+ ::boost::compute::sort_by_key(
+ keys_first, keys_last, values_first, compare, queue
+ );
+}
+
+/// \overload
+template<class KeyIterator, class ValueIterator>
+inline void stable_sort_by_key(KeyIterator keys_first,
+ KeyIterator keys_last,
+ ValueIterator values_first,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+
+ ::boost::compute::stable_sort_by_key(
+ keys_first, keys_last, values_first, less<key_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP
diff --git a/boost/compute/algorithm/swap_ranges.hpp b/boost/compute/algorithm/swap_ranges.hpp
new file mode 100644
index 0000000000..6ff3e14f6a
--- /dev/null
+++ b/boost/compute/algorithm/swap_ranges.hpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP
+#define BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Swaps the elements in the range [\p first1, \p last1) with the
+/// elements in the range beginning at \p first2.
+template<class Iterator1, class Iterator2>
+inline Iterator2 swap_ranges(Iterator1 first1,
+ Iterator1 last1,
+ Iterator2 first2,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator1>::value_type value_type;
+
+ Iterator2 last2 = first2 + std::distance(first1, last1);
+
+ ::boost::compute::vector<value_type> tmp(first1, last1, queue);
+ ::boost::compute::copy(first2, last2, first1, queue);
+ ::boost::compute::copy(tmp.begin(), tmp.end(), first2, queue);
+
+ return last2;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP
diff --git a/boost/compute/algorithm/transform.hpp b/boost/compute/algorithm/transform.hpp
new file mode 100644
index 0000000000..022a4988bd
--- /dev/null
+++ b/boost/compute/algorithm/transform.hpp
@@ -0,0 +1,76 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP
+#define BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/iterator/zip_iterator.hpp>
+#include <boost/compute/functional/detail/unpack.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Transforms the elements in the range [\p first, \p last) using
+/// \p transform and stores the results in the range beginning at
+/// \p result.
+///
+/// For example, to calculate the absolute value for each element in a vector:
+///
+/// \snippet test/test_transform.cpp transform_abs
+///
+/// \see copy()
+template<class InputIterator, class OutputIterator, class UnaryOperator>
+inline OutputIterator transform(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ UnaryOperator op,
+ command_queue &queue = system::default_queue())
+{
+ return copy(
+ ::boost::compute::make_transform_iterator(first, op),
+ ::boost::compute::make_transform_iterator(last, op),
+ result,
+ queue
+ );
+}
+
+/// \overload
+template<class InputIterator1,
+ class InputIterator2,
+ class OutputIterator,
+ class BinaryOperator>
+inline OutputIterator transform(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ OutputIterator result,
+ BinaryOperator op,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::difference_type difference_type;
+
+ difference_type n = std::distance(first1, last1);
+
+ return transform(
+ make_zip_iterator(boost::make_tuple(first1, first2)),
+ make_zip_iterator(boost::make_tuple(last1, first2 + n)),
+ result,
+ detail::unpack(op),
+ queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP
diff --git a/boost/compute/algorithm/transform_if.hpp b/boost/compute/algorithm/transform_if.hpp
new file mode 100644
index 0000000000..0eb0fd434e
--- /dev/null
+++ b/boost/compute/algorithm/transform_if.hpp
@@ -0,0 +1,117 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP
+#define BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/count.hpp>
+#include <boost/compute/algorithm/count_if.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate>
+inline OutputIterator transform_if_impl(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ UnaryFunction function,
+ Predicate predicate,
+ bool copyIndex,
+ command_queue &queue)
+{
+ typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type;
+
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ const context &context = queue.get_context();
+
+ // storage for destination indices
+ ::boost::compute::vector<cl_uint> indices(count, context);
+
+ // write counts
+ ::boost::compute::detail::meta_kernel k1("transform_if_write_counts");
+ k1 << indices.begin()[k1.get_global_id(0)] << " = "
+ << predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n";
+ k1.exec_1d(queue, 0, count);
+
+ // count number of elements to be copied
+ size_t copied_element_count =
+ ::boost::compute::count(indices.begin(), indices.end(), 1, queue);
+
+ // scan indices
+ ::boost::compute::exclusive_scan(
+ indices.begin(), indices.end(), indices.begin(), queue
+ );
+
+ // copy values
+ ::boost::compute::detail::meta_kernel k2("transform_if_do_copy");
+ k2 << "if(" << predicate(first[k2.get_global_id(0)]) << ")" <<
+ " " << result[indices.begin()[k2.get_global_id(0)]] << "=";
+
+ if(copyIndex){
+ k2 << k2.get_global_id(0) << ";\n";
+ }
+ else {
+ k2 << function(first[k2.get_global_id(0)]) << ";\n";
+ }
+
+ k2.exec_1d(queue, 0, count);
+
+ return result + static_cast<difference_type>(copied_element_count);
+}
+
+template<class InputIterator, class UnaryFunction, class Predicate>
+inline discard_iterator transform_if_impl(InputIterator first,
+ InputIterator last,
+ discard_iterator result,
+ UnaryFunction function,
+ Predicate predicate,
+ bool copyIndex,
+ command_queue &queue)
+{
+ (void) function;
+ (void) copyIndex;
+
+ return result + count_if(first, last, predicate, queue);
+}
+
+} // end detail namespace
+
+/// Copies each element in the range [\p first, \p last) for which
+/// \p predicate returns \c true to the range beginning at \p result.
+template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate>
+inline OutputIterator transform_if(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ UnaryFunction function,
+ Predicate predicate,
+ command_queue &queue = system::default_queue())
+{
+ return detail::transform_if_impl(
+ first, last, result, function, predicate, false, queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP
diff --git a/boost/compute/algorithm/transform_reduce.hpp b/boost/compute/algorithm/transform_reduce.hpp
new file mode 100644
index 0000000000..fbeee5a691
--- /dev/null
+++ b/boost/compute/algorithm/transform_reduce.hpp
@@ -0,0 +1,89 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP
+#define BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/reduce.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/iterator/zip_iterator.hpp>
+#include <boost/compute/functional/detail/unpack.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Transforms each value in the range [\p first, \p last) with the unary
+/// \p transform_function and then reduces each transformed value with
+/// \p reduce_function.
+///
+/// For example, to calculate the sum of the absolute values of a vector
+/// of integers:
+///
+/// \snippet test/test_transform_reduce.cpp sum_abs_int
+///
+/// \see reduce(), inner_product()
+template<class InputIterator,
+ class OutputIterator,
+ class UnaryTransformFunction,
+ class BinaryReduceFunction>
+inline void transform_reduce(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ UnaryTransformFunction transform_function,
+ BinaryReduceFunction reduce_function,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::reduce(
+ ::boost::compute::make_transform_iterator(first, transform_function),
+ ::boost::compute::make_transform_iterator(last, transform_function),
+ result,
+ reduce_function,
+ queue
+ );
+}
+
+/// \overload
+template<class InputIterator1,
+ class InputIterator2,
+ class OutputIterator,
+ class BinaryTransformFunction,
+ class BinaryReduceFunction>
+inline void transform_reduce(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ OutputIterator result,
+ BinaryTransformFunction transform_function,
+ BinaryReduceFunction reduce_function,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator1>::difference_type difference_type;
+
+ difference_type n = std::distance(first1, last1);
+
+ ::boost::compute::transform_reduce(
+ ::boost::compute::make_zip_iterator(
+ boost::make_tuple(first1, first2)
+ ),
+ ::boost::compute::make_zip_iterator(
+ boost::make_tuple(last1, first2 + n)
+ ),
+ result,
+ detail::unpack(transform_function),
+ reduce_function,
+ queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP
diff --git a/boost/compute/algorithm/unique.hpp b/boost/compute/algorithm/unique.hpp
new file mode 100644
index 0000000000..faa36bad9d
--- /dev/null
+++ b/boost/compute/algorithm/unique.hpp
@@ -0,0 +1,66 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP
+#define BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/unique_copy.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/functional/operator.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Removes all consecutive duplicate elements (determined by \p op) from the
+/// range [first, last). If \p op is not provided, the equality operator is
+/// used.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param op binary operator used to check for uniqueness
+/// \param queue command queue to perform the operation
+///
+/// \return \c InputIterator to the new logical end of the range
+///
+/// \see unique_copy()
+template<class InputIterator, class BinaryPredicate>
+inline InputIterator unique(InputIterator first,
+ InputIterator last,
+ BinaryPredicate op,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ vector<value_type> temp(first, last, queue);
+
+ return ::boost::compute::unique_copy(
+ temp.begin(), temp.end(), first, op, queue
+ );
+}
+
+/// \overload
+template<class InputIterator>
+inline InputIterator unique(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ return ::boost::compute::unique(
+ first, last, ::boost::compute::equal_to<value_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP
diff --git a/boost/compute/algorithm/unique_copy.hpp b/boost/compute/algorithm/unique_copy.hpp
new file mode 100644
index 0000000000..2ce60a9359
--- /dev/null
+++ b/boost/compute/algorithm/unique_copy.hpp
@@ -0,0 +1,164 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP
+#define BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/algorithm/gather.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/functional/operator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator, class OutputIterator, class BinaryPredicate>
+inline OutputIterator serial_unique_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryPredicate op,
+ command_queue &queue)
+{
+ if(first == last){
+ return result;
+ }
+
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ const context &context = queue.get_context();
+
+ size_t count = detail::iterator_range_size(first, last);
+
+ detail::meta_kernel k("serial_unique_copy");
+
+ vector<uint_> unique_count_vector(1, context);
+
+ size_t size_arg = k.add_arg<const uint_>("size");
+ size_t unique_count_arg = k.add_arg<uint_ *>(memory_object::global_memory, "unique_count");
+
+ k << k.decl<uint_>("index") << " = 0;\n"
+ << k.decl<value_type>("current") << " = " << first[k.var<uint_>("0")] << ";\n"
+ << result[k.var<uint_>("0")] << " = current;\n"
+ << "for(uint i = 1; i < size; i++){\n"
+ << " " << k.decl<value_type>("next") << " = " << first[k.var<uint_>("i")] << ";\n"
+ << " if(!" << op(k.var<value_type>("current"), k.var<value_type>("next")) << "){\n"
+ << " " << result[k.var<uint_>("++index")] << " = next;\n"
+ << " " << "current = next;\n"
+ << " }\n"
+ << "}\n"
+ << "*unique_count = index + 1;\n";
+
+ k.set_arg<const uint_>(size_arg, count);
+ k.set_arg(unique_count_arg, unique_count_vector.get_buffer());
+
+ k.exec_1d(queue, 0, 1, 1);
+
+ uint_ unique_count;
+ copy_n(unique_count_vector.begin(), 1, &unique_count, queue);
+
+ return result + unique_count;
+}
+
+template<class InputIterator, class OutputIterator, class BinaryPredicate>
+inline OutputIterator unique_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryPredicate op,
+ command_queue &queue)
+{
+ if(first == last){
+ return result;
+ }
+
+ const context &context = queue.get_context();
+ size_t count = detail::iterator_range_size(first, last);
+
+ // flags marking unique elements
+ vector<uint_> flags(count, context);
+
+ // find each unique element and mark it with a one
+ transform(
+ first, last - 1, first + 1, flags.begin() + 1, not2(op), queue
+ );
+
+ // first element is always unique
+ fill_n(flags.begin(), 1, 1, queue);
+
+ // storage for desination indices
+ vector<uint_> indices(count, context);
+
+ // copy indices for each unique element
+ vector<uint_>::iterator last_index = detail::copy_index_if(
+ flags.begin(), flags.end(), indices.begin(), lambda::_1 == 1, queue
+ );
+
+ // copy unique values from input to output using the computed indices
+ gather(indices.begin(), last_index, first, result, queue);
+
+ // return an iterator to the end of the unique output range
+ return result + std::distance(indices.begin(), last_index);
+}
+
+} // end detail namespace
+
+/// Makes a copy of the range [first, last) and removes all consecutive
+/// duplicate elements (determined by \p op) from the copy. If \p op is not
+/// provided, the equality operator is used.
+///
+/// \param first first element in the input range
+/// \param last last element in the input range
+/// \param result first element in the result range
+/// \param op binary operator used to check for uniqueness
+/// \param queue command queue to perform the operation
+///
+/// \return \c OutputIterator to the end of the result range
+///
+/// \see unique()
+template<class InputIterator, class OutputIterator, class BinaryPredicate>
+inline OutputIterator unique_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ BinaryPredicate op,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count < 32){
+ return detail::serial_unique_copy(first, last, result, op, queue);
+ }
+ else {
+ return detail::unique_copy(first, last, result, op, queue);
+ }
+}
+
+/// \overload
+template<class InputIterator, class OutputIterator>
+inline OutputIterator unique_copy(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ return ::boost::compute::unique_copy(
+ first, last, result, ::boost::compute::equal_to<value_type>(), queue
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP
diff --git a/boost/compute/algorithm/upper_bound.hpp b/boost/compute/algorithm/upper_bound.hpp
new file mode 100644
index 0000000000..a5a82d301c
--- /dev/null
+++ b/boost/compute/algorithm/upper_bound.hpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP
+#define BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/detail/binary_find.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns an iterator pointing to the first element in the sorted
+/// range [\p first, \p last) that is not less than or equal to
+/// \p value.
+template<class InputIterator, class T>
+inline InputIterator
+upper_bound(InputIterator first,
+ InputIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ using ::boost::compute::_1;
+
+ InputIterator position =
+ detail::binary_find(first, last, _1 > value, queue);
+
+ return position;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP
diff --git a/boost/compute/allocator.hpp b/boost/compute/allocator.hpp
new file mode 100644
index 0000000000..e3db491b42
--- /dev/null
+++ b/boost/compute/allocator.hpp
@@ -0,0 +1,21 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALLOCATOR_HPP
+#define BOOST_COMPUTE_ALLOCATOR_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute allocator headers.
+
+#include <boost/compute/allocator/buffer_allocator.hpp>
+#include <boost/compute/allocator/pinned_allocator.hpp>
+
+#endif // BOOST_COMPUTE_ALLOCATOR_HPP
diff --git a/boost/compute/allocator/buffer_allocator.hpp b/boost/compute/allocator/buffer_allocator.hpp
new file mode 100644
index 0000000000..2139a97896
--- /dev/null
+++ b/boost/compute/allocator/buffer_allocator.hpp
@@ -0,0 +1,118 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP
+#define BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/detail/device_ptr.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class buffer_allocator
+/// \brief The buffer_allocator class allocates memory with \ref buffer objects
+///
+/// \see buffer
+template<class T>
+class buffer_allocator
+{
+public:
+ typedef T value_type;
+ typedef detail::device_ptr<T> pointer;
+ typedef const detail::device_ptr<T> const_pointer;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+
+ explicit buffer_allocator(const context &context)
+ : m_context(context),
+ m_mem_flags(buffer::read_write)
+ {
+ }
+
+ buffer_allocator(const buffer_allocator<T> &other)
+ : m_context(other.m_context),
+ m_mem_flags(other.m_mem_flags)
+ {
+ }
+
+ buffer_allocator<T>& operator=(const buffer_allocator<T> &other)
+ {
+ if(this != &other){
+ m_context = other.m_context;
+ m_mem_flags = other.m_mem_flags;
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ buffer_allocator(buffer_allocator<T>&& other) BOOST_NOEXCEPT
+ : m_context(std::move(other.m_context)),
+ m_mem_flags(other.m_mem_flags)
+ {
+ }
+
+ buffer_allocator<T>& operator=(buffer_allocator<T>&& other) BOOST_NOEXCEPT
+ {
+ m_context = std::move(other.m_context);
+ m_mem_flags = other.m_mem_flags;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ ~buffer_allocator()
+ {
+ }
+
+ pointer allocate(size_type n)
+ {
+ buffer buf(m_context, n * sizeof(T), m_mem_flags);
+ clRetainMemObject(buf.get());
+ return detail::device_ptr<T>(buf);
+ }
+
+ void deallocate(pointer p, size_type n)
+ {
+ BOOST_ASSERT(p.get_buffer().get_context() == m_context);
+
+ (void) n;
+
+ clReleaseMemObject(p.get_buffer().get());
+ }
+
+ size_type max_size() const
+ {
+ return m_context.get_device().max_memory_alloc_size() / sizeof(T);
+ }
+
+ context get_context() const
+ {
+ return m_context;
+ }
+
+protected:
+ void set_mem_flags(cl_mem_flags flags)
+ {
+ m_mem_flags = flags;
+ }
+
+private:
+ context m_context;
+ cl_mem_flags m_mem_flags;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP
diff --git a/boost/compute/allocator/pinned_allocator.hpp b/boost/compute/allocator/pinned_allocator.hpp
new file mode 100644
index 0000000000..9c85d93e79
--- /dev/null
+++ b/boost/compute/allocator/pinned_allocator.hpp
@@ -0,0 +1,53 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP
+#define BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP
+
+#include <boost/compute/allocator/buffer_allocator.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T>
+class pinned_allocator : public buffer_allocator<T>
+{
+public:
+ explicit pinned_allocator(const context &context)
+ : buffer_allocator<T>(context)
+ {
+ buffer_allocator<T>::set_mem_flags(
+ buffer::read_write | buffer::alloc_host_ptr
+ );
+ }
+
+ pinned_allocator(const pinned_allocator<T> &other)
+ : buffer_allocator<T>(other)
+ {
+ }
+
+ pinned_allocator<T>& operator=(const pinned_allocator<T> &other)
+ {
+ if(this != &other){
+ buffer_allocator<T>::operator=(other);
+ }
+
+ return *this;
+ }
+
+ ~pinned_allocator()
+ {
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP
diff --git a/boost/compute/async.hpp b/boost/compute/async.hpp
new file mode 100644
index 0000000000..415dedda7d
--- /dev/null
+++ b/boost/compute/async.hpp
@@ -0,0 +1,21 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ASYNC_HPP
+#define BOOST_COMPUTE_ASYNC_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute async headers.
+
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/async/wait_guard.hpp>
+
+#endif // BOOST_COMPUTE_ASYNC_HPP
diff --git a/boost/compute/async/future.hpp b/boost/compute/async/future.hpp
new file mode 100644
index 0000000000..f7f7780deb
--- /dev/null
+++ b/boost/compute/async/future.hpp
@@ -0,0 +1,166 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ASYNC_FUTURE_HPP
+#define BOOST_COMPUTE_ASYNC_FUTURE_HPP
+
+#include <boost/compute/event.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class future
+/// \brief Holds the result of an asynchronous computation.
+///
+/// \see event, wait_list
+template<class T>
+class future
+{
+public:
+ future()
+ : m_event(0)
+ {
+ }
+
+ future(const T &result, const event &event)
+ : m_result(result),
+ m_event(event)
+ {
+ }
+
+ future(const future<T> &other)
+ : m_result(other.m_result),
+ m_event(other.m_event)
+ {
+ }
+
+ future& operator=(const future<T> &other)
+ {
+ if(this != &other){
+ m_result = other.m_result;
+ m_event = other.m_event;
+ }
+
+ return *this;
+ }
+
+ ~future()
+ {
+ }
+
+ /// Returns the result of the computation. This will block until
+ /// the result is ready.
+ T get()
+ {
+ wait();
+
+ return m_result;
+ }
+
+ /// Returns \c true if the future is valid.
+ bool valid() const
+ {
+ return m_event != 0;
+ }
+
+ /// Blocks until the computation is complete.
+ void wait() const
+ {
+ m_event.wait();
+ }
+
+ /// Returns the underlying event object.
+ event get_event() const
+ {
+ return m_event;
+ }
+
+private:
+ T m_result;
+ event m_event;
+};
+
+/// \internal_
+template<>
+class future<void>
+{
+public:
+ future()
+ : m_event(0)
+ {
+ }
+
+ template<class T>
+ future(const future<T> &other)
+ : m_event(other.get_event())
+ {
+ }
+
+ explicit future(const event &event)
+ : m_event(event)
+ {
+ }
+
+ template<class T>
+ future<void> &operator=(const future<T> &other)
+ {
+ m_event = other.get_event();
+
+ return *this;
+ }
+
+ future<void> &operator=(const future<void> &other)
+ {
+ if(this != &other){
+ m_event = other.m_event;
+ }
+
+ return *this;
+ }
+
+ ~future()
+ {
+ }
+
+ void get()
+ {
+ wait();
+ }
+
+ bool valid() const
+ {
+ return m_event != 0;
+ }
+
+ void wait() const
+ {
+ m_event.wait();
+ }
+
+ event get_event() const
+ {
+ return m_event;
+ }
+
+private:
+ event m_event;
+};
+
+/// \internal_
+template<class Result>
+inline future<Result> make_future(const Result &result, const event &event)
+{
+ return future<Result>(result, event);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ASYNC_FUTURE_HPP
diff --git a/boost/compute/async/wait.hpp b/boost/compute/async/wait.hpp
new file mode 100644
index 0000000000..dacf0feb0c
--- /dev/null
+++ b/boost/compute/async/wait.hpp
@@ -0,0 +1,56 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ASYNC_WAIT_HPP
+#define BOOST_COMPUTE_ASYNC_WAIT_HPP
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/utility/wait_list.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+#ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+template<class Event>
+inline void insert_events_variadic(wait_list &l, Event&& event)
+{
+ l.insert(std::forward<Event>(event));
+}
+
+template<class Event, class... Rest>
+inline void insert_events_variadic(wait_list &l, Event&& event, Rest&&... rest)
+{
+ l.insert(std::forward<Event>(event));
+
+ insert_events_variadic(l, std::forward<Rest>(rest)...);
+}
+#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+
+} // end detail namespace
+
+#ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+/// Blocks until all events have completed. Events can either be \ref event
+/// objects or \ref future "future<T>" objects.
+///
+/// \see event, wait_list
+template<class... Events>
+inline void wait_for_all(Events&&... events)
+{
+ wait_list l;
+ detail::insert_events_variadic(l, std::forward<Events>(events)...);
+ l.wait();
+}
+#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ASYNC_WAIT_HPP
diff --git a/boost/compute/async/wait_guard.hpp b/boost/compute/async/wait_guard.hpp
new file mode 100644
index 0000000000..46018fa35a
--- /dev/null
+++ b/boost/compute/async/wait_guard.hpp
@@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP
+#define BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP
+
+#include <boost/noncopyable.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class wait_guard
+/// \brief A guard object for synchronizing an operation on the device
+///
+/// The wait_guard class stores a waitable object representing an operation
+/// on a compute device (e.g. \ref event, \ref future "future<T>") and calls
+/// its \c wait() method when the guard object goes out of scope.
+///
+/// This is useful for ensuring that an OpenCL operation completes before
+/// leaving the current scope and cleaning up any resources.
+///
+/// For example:
+/// \code
+/// // enqueue a compute kernel for execution
+/// event e = queue.enqueue_nd_range_kernel(...);
+///
+/// // call e.wait() upon exiting the current scope
+/// wait_guard<event> guard(e);
+/// \endcode
+///
+/// \ref wait_list, wait_for_all()
+template<class Waitable>
+class wait_guard : boost::noncopyable
+{
+public:
+ /// Creates a new wait_guard object for \p waitable.
+ wait_guard(const Waitable &waitable)
+ : m_waitable(waitable)
+ {
+ }
+
+ /// Destroys the wait_guard object. The default implementation will call
+ /// \c wait() on the stored waitable object.
+ ~wait_guard()
+ {
+ m_waitable.wait();
+ }
+
+private:
+ Waitable m_waitable;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP
diff --git a/boost/compute/buffer.hpp b/boost/compute/buffer.hpp
new file mode 100644
index 0000000000..b5a48806d5
--- /dev/null
+++ b/boost/compute/buffer.hpp
@@ -0,0 +1,227 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_BUFFER_HPP
+#define BOOST_COMPUTE_BUFFER_HPP
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/memory_object.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declarations
+class command_queue;
+
+/// \class buffer
+/// \brief A memory buffer on a compute device.
+///
+/// The buffer class represents a memory buffer on a compute device.
+///
+/// Buffers are allocated within a compute context. For example, to allocate
+/// a memory buffer for 32 float's:
+///
+/// \snippet test/test_buffer.cpp constructor
+///
+/// Once created, data can be copied to and from the buffer using the
+/// \c enqueue_*_buffer() methods in the command_queue class. For example, to
+/// copy a set of \c int values from the host to the device:
+/// \code
+/// int data[] = { 1, 2, 3, 4 };
+///
+/// queue.enqueue_write_buffer(buf, 0, 4 * sizeof(int), data);
+/// \endcode
+///
+/// Also see the copy() algorithm for a higher-level interface to copying data
+/// between the host and the device. For a higher-level, dynamically-resizable,
+/// type-safe container for data on a compute device, use the vector<T> class.
+///
+/// Buffer objects have reference semantics. Creating a copy of a buffer
+/// object simply creates another reference to the underlying OpenCL memory
+/// object. To create an actual copy use the buffer::clone() method.
+///
+/// \see context, command_queue
+class buffer : public memory_object
+{
+public:
+ /// Creates a null buffer object.
+ buffer()
+ : memory_object()
+ {
+ }
+
+ /// Creates a buffer object for \p mem. If \p retain is \c true, the
+ /// reference count for \p mem will be incremented.
+ explicit buffer(cl_mem mem, bool retain = true)
+ : memory_object(mem, retain)
+ {
+ }
+
+ /// Create a new memory buffer in of \p size with \p flags in
+ /// \p context.
+ ///
+ /// \see_opencl_ref{clCreateBuffer}
+ buffer(const context &context,
+ size_t size,
+ cl_mem_flags flags = read_write,
+ void *host_ptr = 0)
+ {
+ cl_int error = 0;
+ m_mem = clCreateBuffer(context,
+ flags,
+ (std::max)(size, size_t(1)),
+ host_ptr,
+ &error);
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new buffer object as a copy of \p other.
+ buffer(const buffer &other)
+ : memory_object(other)
+ {
+ }
+
+ /// Copies the buffer object from \p other to \c *this.
+ buffer& operator=(const buffer &other)
+ {
+ if(this != &other){
+ memory_object::operator=(other);
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new buffer object from \p other.
+ buffer(buffer&& other) BOOST_NOEXCEPT
+ : memory_object(std::move(other))
+ {
+ }
+
+ /// Move-assigns the buffer from \p other to \c *this.
+ buffer& operator=(buffer&& other) BOOST_NOEXCEPT
+ {
+ memory_object::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the buffer object.
+ ~buffer()
+ {
+ }
+
+ /// Returns the size of the buffer in bytes.
+ size_t size() const
+ {
+ return get_memory_size();
+ }
+
+ /// \internal_
+ size_t max_size() const
+ {
+ return get_context().get_device().max_memory_alloc_size();
+ }
+
+ /// Returns information about the buffer.
+ ///
+ /// \see_opencl_ref{clGetMemObjectInfo}
+ template<class T>
+ T get_info(cl_mem_info info) const
+ {
+ return get_memory_info<T>(info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<buffer, Enum>::type
+ get_info() const;
+
+ /// Creates a new buffer with a copy of the data in \c *this. Uses
+ /// \p queue to perform the copy.
+ buffer clone(command_queue &queue) const;
+
+ #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Creates a new buffer out of this buffer.
+ /// The new buffer is a sub region of this buffer.
+ /// \p flags The mem_flags which should be used to create the new buffer
+ /// \p origin The start index in this buffer
+ /// \p size The size of the new sub buffer
+ ///
+ /// \see_opencl_ref{clCreateSubBuffer}
+ ///
+ /// \opencl_version_warning{1,1}
+ buffer create_subbuffer(cl_mem_flags flags, size_t origin,
+ size_t size)
+ {
+ BOOST_ASSERT(origin + size <= this->size());
+ BOOST_ASSERT(origin % (get_context().
+ get_device().
+ get_info<CL_DEVICE_MEM_BASE_ADDR_ALIGN>() / 8) == 0);
+ cl_int error = 0;
+
+ cl_buffer_region region = { origin, size };
+
+ cl_mem mem = clCreateSubBuffer(m_mem,
+ flags,
+ CL_BUFFER_CREATE_TYPE_REGION,
+ &region,
+ &error);
+
+ if(!mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ return buffer(mem, false);
+ }
+ #endif // CL_VERSION_1_1
+};
+
+/// \internal_ define get_info() specializations for buffer
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer,
+ ((cl_mem_object_type, CL_MEM_TYPE))
+ ((cl_mem_flags, CL_MEM_FLAGS))
+ ((size_t, CL_MEM_SIZE))
+ ((void *, CL_MEM_HOST_PTR))
+ ((cl_uint, CL_MEM_MAP_COUNT))
+ ((cl_uint, CL_MEM_REFERENCE_COUNT))
+ ((cl_context, CL_MEM_CONTEXT))
+)
+
+#ifdef CL_VERSION_1_1
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer,
+ ((cl_mem, CL_MEM_ASSOCIATED_MEMOBJECT))
+ ((size_t, CL_MEM_OFFSET))
+)
+#endif // CL_VERSION_1_1
+
+namespace detail {
+
+// set_kernel_arg specialization for buffer
+template<>
+struct set_kernel_arg<buffer>
+{
+ void operator()(kernel &kernel_, size_t index, const buffer &buffer_)
+ {
+ kernel_.set_arg(index, buffer_.get());
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_BUFFER_HPP
diff --git a/boost/compute/cl.hpp b/boost/compute/cl.hpp
new file mode 100644
index 0000000000..c439d8dfdc
--- /dev/null
+++ b/boost/compute/cl.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CL_HPP
+#define BOOST_COMPUTE_CL_HPP
+
+#if defined(__APPLE__)
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#endif // BOOST_COMPUTE_CL_HPP
diff --git a/boost/compute/cl_ext.hpp b/boost/compute/cl_ext.hpp
new file mode 100644
index 0000000000..0b21a12fd7
--- /dev/null
+++ b/boost/compute/cl_ext.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CL_EXT_HPP
+#define BOOST_COMPUTE_CL_EXT_HPP
+
+#if defined(__APPLE__)
+#include <OpenCL/cl_ext.h>
+#else
+#include <CL/cl_ext.h>
+#endif
+
+#endif // BOOST_COMPUTE_CL_EXT_HPP
diff --git a/boost/compute/closure.hpp b/boost/compute/closure.hpp
new file mode 100644
index 0000000000..6e3cbe702b
--- /dev/null
+++ b/boost/compute/closure.hpp
@@ -0,0 +1,347 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CLOSURE_HPP
+#define BOOST_COMPUTE_CLOSURE_HPP
+
+#include <string>
+#include <sstream>
+
+#include <boost/config.hpp>
+#include <boost/fusion/adapted/boost_tuple.hpp>
+#include <boost/fusion/algorithm/iteration/for_each.hpp>
+#include <boost/mpl/for_each.hpp>
+#include <boost/mpl/transform.hpp>
+#include <boost/typeof/typeof.hpp>
+#include <boost/static_assert.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/type_traits/function_traits.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/type_traits/detail/capture_traits.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class ResultType, class ArgTuple, class CaptureTuple>
+class invoked_closure
+{
+public:
+ typedef ResultType result_type;
+
+ BOOST_STATIC_CONSTANT(
+ size_t, arity = boost::tuples::length<ArgTuple>::value
+ );
+
+ invoked_closure(const std::string &name,
+ const std::string &source,
+ const std::map<std::string, std::string> &definitions,
+ const ArgTuple &args,
+ const CaptureTuple &capture)
+ : m_name(name),
+ m_source(source),
+ m_definitions(definitions),
+ m_args(args),
+ m_capture(capture)
+ {
+ }
+
+ std::string name() const
+ {
+ return m_name;
+ }
+
+ std::string source() const
+ {
+ return m_source;
+ }
+
+ const std::map<std::string, std::string>& definitions() const
+ {
+ return m_definitions;
+ }
+
+ const ArgTuple& args() const
+ {
+ return m_args;
+ }
+
+ const CaptureTuple& capture() const
+ {
+ return m_capture;
+ }
+
+private:
+ std::string m_name;
+ std::string m_source;
+ std::map<std::string, std::string> m_definitions;
+ ArgTuple m_args;
+ CaptureTuple m_capture;
+};
+
+} // end detail namespace
+
+/// \internal_
+template<class Signature, class CaptureTuple>
+class closure
+{
+public:
+ typedef typename
+ boost::function_traits<Signature>::result_type result_type;
+
+ BOOST_STATIC_CONSTANT(
+ size_t, arity = boost::function_traits<Signature>::arity
+ );
+
+ closure(const std::string &name,
+ const CaptureTuple &capture,
+ const std::string &source)
+ : m_name(name),
+ m_source(source),
+ m_capture(capture)
+ {
+ }
+
+ ~closure()
+ {
+ }
+
+ std::string name() const
+ {
+ return m_name;
+ }
+
+ /// \internal_
+ std::string source() const
+ {
+ return m_source;
+ }
+
+ /// \internal_
+ void define(std::string name, std::string value = std::string())
+ {
+ m_definitions[name] = value;
+ }
+
+ /// \internal_
+ detail::invoked_closure<result_type, boost::tuple<>, CaptureTuple>
+ operator()() const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 0,
+ "Non-nullary closure function invoked with zero arguments"
+ );
+
+ return detail::invoked_closure<result_type, boost::tuple<>, CaptureTuple>(
+ m_name, m_source, m_definitions, boost::make_tuple(), m_capture
+ );
+ }
+
+ /// \internal_
+ template<class Arg1>
+ detail::invoked_closure<result_type, boost::tuple<Arg1>, CaptureTuple>
+ operator()(const Arg1 &arg1) const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 1,
+ "Non-unary closure function invoked with one argument"
+ );
+
+ return detail::invoked_closure<result_type, boost::tuple<Arg1>, CaptureTuple>(
+ m_name, m_source, m_definitions, boost::make_tuple(arg1), m_capture
+ );
+ }
+
+ /// \internal_
+ template<class Arg1, class Arg2>
+ detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2>, CaptureTuple>
+ operator()(const Arg1 &arg1, const Arg2 &arg2) const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 2,
+ "Non-binary closure function invoked with two arguments"
+ );
+
+ return detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2>, CaptureTuple>(
+ m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2), m_capture
+ );
+ }
+
+ /// \internal_
+ template<class Arg1, class Arg2, class Arg3>
+ detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2, Arg3>, CaptureTuple>
+ operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 3,
+ "Non-ternary closure function invoked with three arguments"
+ );
+
+ return detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2, Arg3>, CaptureTuple>(
+ m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3), m_capture
+ );
+ }
+
+private:
+ std::string m_name;
+ std::string m_source;
+ std::map<std::string, std::string> m_definitions;
+ CaptureTuple m_capture;
+};
+
+namespace detail {
+
+struct closure_signature_argument_inserter
+{
+ closure_signature_argument_inserter(std::stringstream &s_,
+ const char *capture_string,
+ size_t last)
+ : s(s_)
+ {
+ n = 0;
+ m_last = last;
+
+ size_t capture_string_length = std::strlen(capture_string);
+ BOOST_ASSERT(capture_string[0] == '(' &&
+ capture_string[capture_string_length-1] == ')');
+ std::string capture_string_(capture_string + 1, capture_string_length - 2);
+ boost::split(m_capture_names, capture_string_ , boost::is_any_of(","));
+ }
+
+ template<class T>
+ void operator()(const T&) const
+ {
+ BOOST_ASSERT(n < m_capture_names.size());
+
+ // get captured variable name
+ std::string variable_name = m_capture_names[n];
+
+ // remove leading and trailing whitespace from variable name
+ boost::trim(variable_name);
+
+ s << capture_traits<T>::type_name() << " " << variable_name;
+ if(n+1 < m_last){
+ s << ", ";
+ }
+ n++;
+ }
+
+ mutable size_t n;
+ size_t m_last;
+ std::vector<std::string> m_capture_names;
+ std::stringstream &s;
+};
+
+template<class Signature, class CaptureTuple>
+inline std::string
+make_closure_declaration(const char *name,
+ const char *arguments,
+ const CaptureTuple &capture_tuple,
+ const char *capture_string)
+{
+ typedef typename
+ boost::function_traits<Signature>::result_type result_type;
+ typedef typename
+ boost::function_types::parameter_types<Signature>::type parameter_types;
+ typedef typename
+ mpl::size<parameter_types>::type arity_type;
+
+ std::stringstream s;
+ s << "inline " << type_name<result_type>() << " " << name;
+ s << "(";
+
+ // insert function arguments
+ signature_argument_inserter i(s, arguments, arity_type::value);
+ mpl::for_each<
+ typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1>
+ >::type>(i);
+ s << ", ";
+
+ // insert capture arguments
+ closure_signature_argument_inserter j(
+ s, capture_string, boost::tuples::length<CaptureTuple>::value
+ );
+ fusion::for_each(capture_tuple, j);
+
+ s << ")";
+ return s.str();
+}
+
+// used by the BOOST_COMPUTE_CLOSURE() macro to create a closure
+// function with the given signature, name, capture, and source.
+template<class Signature, class CaptureTuple>
+inline closure<Signature, CaptureTuple>
+make_closure_impl(const char *name,
+ const char *arguments,
+ const CaptureTuple &capture,
+ const char *capture_string,
+ const std::string &source)
+{
+ std::stringstream s;
+ s << make_closure_declaration<Signature>(name, arguments, capture, capture_string);
+ s << source;
+
+ return closure<Signature, CaptureTuple>(name, capture, s.str());
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+/// Creates a closure function object with \p name and \p source.
+///
+/// \param return_type The return type for the function.
+/// \param name The name of the function.
+/// \param arguments A list of arguments for the function.
+/// \param capture A list of variables to capture.
+/// \param source The OpenCL C source code for the function.
+///
+/// For example, to create a function which checks if a 2D point is
+/// contained in a circle of a given radius:
+/// \code
+/// // radius variable declared in C++
+/// float radius = 1.5f;
+///
+/// // create a closure function which returns true if the 2D point
+/// // argument is contained within a circle of the given radius
+/// BOOST_COMPUTE_CLOSURE(bool, is_in_circle, (const float2_ p), (radius),
+/// {
+/// return sqrt(p.x*p.x + p.y*p.y) < radius;
+/// });
+///
+/// // vector of 2D points
+/// boost::compute::vector<float2_> points = ...
+///
+/// // count number of points in the circle
+/// size_t count = boost::compute::count_if(
+/// points.begin(), points.end(), is_in_circle, queue
+/// );
+/// \endcode
+///
+/// \see BOOST_COMPUTE_FUNCTION()
+#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED
+#define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, source)
+#else
+#define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, ...) \
+ ::boost::compute::closure< \
+ return_type arguments, BOOST_TYPEOF(boost::tie capture) \
+ > name = \
+ ::boost::compute::detail::make_closure_impl< \
+ return_type arguments \
+ >( \
+ #name, #arguments, boost::tie capture, #capture, #__VA_ARGS__ \
+ )
+#endif
+
+#endif // BOOST_COMPUTE_CLOSURE_HPP
diff --git a/boost/compute/command_queue.hpp b/boost/compute/command_queue.hpp
new file mode 100644
index 0000000000..2a1328a959
--- /dev/null
+++ b/boost/compute/command_queue.hpp
@@ -0,0 +1,1881 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_COMMAND_QUEUE_HPP
+#define BOOST_COMPUTE_COMMAND_QUEUE_HPP
+
+#include <cstddef>
+#include <algorithm>
+
+#include <boost/assert.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/event.hpp>
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/image/image1d.hpp>
+#include <boost/compute/image/image2d.hpp>
+#include <boost/compute/image/image3d.hpp>
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/utility/wait_list.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+#include <boost/compute/detail/diagnostic.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+inline void BOOST_COMPUTE_CL_CALLBACK
+nullary_native_kernel_trampoline(void *user_func_ptr)
+{
+ void (*user_func)();
+ std::memcpy(&user_func, user_func_ptr, sizeof(user_func));
+ user_func();
+}
+
+} // end detail namespace
+
+/// \class command_queue
+/// \brief A command queue.
+///
+/// Command queues provide the interface for interacting with compute
+/// devices. The command_queue class provides methods to copy data to
+/// and from a compute device as well as execute compute kernels.
+///
+/// Command queues are created for a compute device within a compute
+/// context.
+///
+/// For example, to create a context and command queue for the default device
+/// on the system (this is the normal set up code used by almost all OpenCL
+/// programs):
+/// \code
+/// #include <boost/compute/core.hpp>
+///
+/// // get the default compute device
+/// boost::compute::device device = boost::compute::system::default_device();
+///
+/// // set up a compute context and command queue
+/// boost::compute::context context(device);
+/// boost::compute::command_queue queue(context, device);
+/// \endcode
+///
+/// The default command queue for the system can be obtained with the
+/// system::default_queue() method.
+///
+/// \see buffer, context, kernel
+class command_queue
+{
+public:
+ enum properties {
+ enable_profiling = CL_QUEUE_PROFILING_ENABLE,
+ enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ };
+
+ enum map_flags {
+ map_read = CL_MAP_READ,
+ map_write = CL_MAP_WRITE
+ #ifdef CL_VERSION_1_2
+ ,
+ map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION
+ #endif
+ };
+
+ /// Creates a null command queue.
+ command_queue()
+ : m_queue(0)
+ {
+ }
+
+ explicit command_queue(cl_command_queue queue, bool retain = true)
+ : m_queue(queue)
+ {
+ if(m_queue && retain){
+ clRetainCommandQueue(m_queue);
+ }
+ }
+
+ /// Creates a command queue in \p context for \p device with
+ /// \p properties.
+ ///
+ /// \see_opencl_ref{clCreateCommandQueue}
+ command_queue(const context &context,
+ const device &device,
+ cl_command_queue_properties properties = 0)
+ {
+ BOOST_ASSERT(device.id() != 0);
+
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_2_0
+ if (device.check_version(2, 0)){
+ std::vector<cl_queue_properties> queue_properties;
+ if(properties){
+ queue_properties.push_back(CL_QUEUE_PROPERTIES);
+ queue_properties.push_back(cl_queue_properties(properties));
+ queue_properties.push_back(cl_queue_properties(0));
+ }
+
+ const cl_queue_properties *queue_properties_ptr =
+ queue_properties.empty() ? 0 : &queue_properties[0];
+
+ m_queue = clCreateCommandQueueWithProperties(
+ context, device.id(), queue_properties_ptr, &error
+ );
+ } else
+ #endif
+ {
+ // Suppress deprecated declarations warning
+ BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
+ m_queue = clCreateCommandQueue(
+ context, device.id(), properties, &error
+ );
+ BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
+ }
+
+ if(!m_queue){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new command queue object as a copy of \p other.
+ command_queue(const command_queue &other)
+ : m_queue(other.m_queue)
+ {
+ if(m_queue){
+ clRetainCommandQueue(m_queue);
+ }
+ }
+
+ /// Copies the command queue object from \p other to \c *this.
+ command_queue& operator=(const command_queue &other)
+ {
+ if(this != &other){
+ if(m_queue){
+ clReleaseCommandQueue(m_queue);
+ }
+
+ m_queue = other.m_queue;
+
+ if(m_queue){
+ clRetainCommandQueue(m_queue);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new command queue object from \p other.
+ command_queue(command_queue&& other) BOOST_NOEXCEPT
+ : m_queue(other.m_queue)
+ {
+ other.m_queue = 0;
+ }
+
+ /// Move-assigns the command queue from \p other to \c *this.
+ command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT
+ {
+ if(m_queue){
+ clReleaseCommandQueue(m_queue);
+ }
+
+ m_queue = other.m_queue;
+ other.m_queue = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the command queue.
+ ///
+ /// \see_opencl_ref{clReleaseCommandQueue}
+ ~command_queue()
+ {
+ if(m_queue){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseCommandQueue(m_queue)
+ );
+ }
+ }
+
+ /// Returns the underlying OpenCL command queue.
+ cl_command_queue& get() const
+ {
+ return const_cast<cl_command_queue &>(m_queue);
+ }
+
+ /// Returns the device that the command queue issues commands to.
+ device get_device() const
+ {
+ return device(get_info<cl_device_id>(CL_QUEUE_DEVICE));
+ }
+
+ /// Returns the context for the command queue.
+ context get_context() const
+ {
+ return context(get_info<cl_context>(CL_QUEUE_CONTEXT));
+ }
+
+ /// Returns information about the command queue.
+ ///
+ /// \see_opencl_ref{clGetCommandQueueInfo}
+ template<class T>
+ T get_info(cl_command_queue_info info) const
+ {
+ return detail::get_object_info<T>(clGetCommandQueueInfo, m_queue, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<command_queue, Enum>::type
+ get_info() const;
+
+ /// Returns the properties for the command queue.
+ cl_command_queue_properties get_properties() const
+ {
+ return get_info<cl_command_queue_properties>(CL_QUEUE_PROPERTIES);
+ }
+
+ /// Enqueues a command to read data from \p buffer to host memory.
+ ///
+ /// \see_opencl_ref{clEnqueueReadBuffer}
+ ///
+ /// \see copy()
+ event enqueue_read_buffer(const buffer &buffer,
+ size_t offset,
+ size_t size,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueReadBuffer(
+ m_queue,
+ buffer.get(),
+ CL_TRUE,
+ offset,
+ size,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to read data from \p buffer to host memory. The
+ /// copy will be performed asynchronously.
+ ///
+ /// \see_opencl_ref{clEnqueueReadBuffer}
+ ///
+ /// \see copy_async()
+ event enqueue_read_buffer_async(const buffer &buffer,
+ size_t offset,
+ size_t size,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueReadBuffer(
+ m_queue,
+ buffer.get(),
+ CL_FALSE,
+ offset,
+ size,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a command to read a rectangular region from \p buffer to
+ /// host memory.
+ ///
+ /// \see_opencl_ref{clEnqueueReadBufferRect}
+ ///
+ /// \opencl_version_warning{1,1}
+ event enqueue_read_buffer_rect(const buffer &buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueReadBufferRect(
+ m_queue,
+ buffer.get(),
+ CL_TRUE,
+ buffer_origin,
+ host_origin,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to read a rectangular region from \p buffer to
+ /// host memory. The copy will be performed asynchronously.
+ ///
+ /// \see_opencl_ref{clEnqueueReadBufferRect}
+ ///
+ /// \opencl_version_warning{1,1}
+ event enqueue_read_buffer_rect_async(const buffer &buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueReadBufferRect(
+ m_queue,
+ buffer.get(),
+ CL_FALSE,
+ buffer_origin,
+ host_origin,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_1
+
+ /// Enqueues a command to write data from host memory to \p buffer.
+ ///
+ /// \see_opencl_ref{clEnqueueWriteBuffer}
+ ///
+ /// \see copy()
+ event enqueue_write_buffer(const buffer &buffer,
+ size_t offset,
+ size_t size,
+ const void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueWriteBuffer(
+ m_queue,
+ buffer.get(),
+ CL_TRUE,
+ offset,
+ size,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to write data from host memory to \p buffer.
+ /// The copy is performed asynchronously.
+ ///
+ /// \see_opencl_ref{clEnqueueWriteBuffer}
+ ///
+ /// \see copy_async()
+ event enqueue_write_buffer_async(const buffer &buffer,
+ size_t offset,
+ size_t size,
+ const void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueWriteBuffer(
+ m_queue,
+ buffer.get(),
+ CL_FALSE,
+ offset,
+ size,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a command to write a rectangular region from host memory
+ /// to \p buffer.
+ ///
+ /// \see_opencl_ref{clEnqueueWriteBufferRect}
+ ///
+ /// \opencl_version_warning{1,1}
+ event enqueue_write_buffer_rect(const buffer &buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueWriteBufferRect(
+ m_queue,
+ buffer.get(),
+ CL_TRUE,
+ buffer_origin,
+ host_origin,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to write a rectangular region from host memory
+ /// to \p buffer. The copy is performed asynchronously.
+ ///
+ /// \see_opencl_ref{clEnqueueWriteBufferRect}
+ ///
+ /// \opencl_version_warning{1,1}
+ event enqueue_write_buffer_rect_async(const buffer &buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+ BOOST_ASSERT(host_ptr != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueWriteBufferRect(
+ m_queue,
+ buffer.get(),
+ CL_FALSE,
+ buffer_origin,
+ host_origin,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_1
+
+ /// Enqueues a command to copy data from \p src_buffer to
+ /// \p dst_buffer.
+ ///
+ /// \see_opencl_ref{clEnqueueCopyBuffer}
+ ///
+ /// \see copy()
+ event enqueue_copy_buffer(const buffer &src_buffer,
+ const buffer &dst_buffer,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t size,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(src_offset + size <= src_buffer.size());
+ BOOST_ASSERT(dst_offset + size <= dst_buffer.size());
+ BOOST_ASSERT(src_buffer.get_context() == this->get_context());
+ BOOST_ASSERT(dst_buffer.get_context() == this->get_context());
+
+ event event_;
+
+ cl_int ret = clEnqueueCopyBuffer(
+ m_queue,
+ src_buffer.get(),
+ dst_buffer.get(),
+ src_offset,
+ dst_offset,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a command to copy a rectangular region from
+ /// \p src_buffer to \p dst_buffer.
+ ///
+ /// \see_opencl_ref{clEnqueueCopyBufferRect}
+ ///
+ /// \opencl_version_warning{1,1}
+ event enqueue_copy_buffer_rect(const buffer &src_buffer,
+ const buffer &dst_buffer,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(src_buffer.get_context() == this->get_context());
+ BOOST_ASSERT(dst_buffer.get_context() == this->get_context());
+
+ event event_;
+
+ cl_int ret = clEnqueueCopyBufferRect(
+ m_queue,
+ src_buffer.get(),
+ dst_buffer.get(),
+ src_origin,
+ dst_origin,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_1
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a command to fill \p buffer with \p pattern.
+ ///
+ /// \see_opencl_ref{clEnqueueFillBuffer}
+ ///
+ /// \opencl_version_warning{1,2}
+ ///
+ /// \see fill()
+ event enqueue_fill_buffer(const buffer &buffer,
+ const void *pattern,
+ size_t pattern_size,
+ size_t offset,
+ size_t size,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(offset + size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+
+ event event_;
+
+ cl_int ret = clEnqueueFillBuffer(
+ m_queue,
+ buffer.get(),
+ pattern,
+ pattern_size,
+ offset,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Enqueues a command to map \p buffer into the host address space.
+ /// Event associated with map operation is returned through
+ /// \p map_buffer_event parameter.
+ ///
+ /// \see_opencl_ref{clEnqueueMapBuffer}
+ void* enqueue_map_buffer(const buffer &buffer,
+ cl_map_flags flags,
+ size_t offset,
+ size_t size,
+ event &map_buffer_event,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(offset + size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+
+ cl_int ret = 0;
+ void *pointer = clEnqueueMapBuffer(
+ m_queue,
+ buffer.get(),
+ CL_TRUE,
+ flags,
+ offset,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &map_buffer_event.get(),
+ &ret
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return pointer;
+ }
+
+ /// \overload
+ void* enqueue_map_buffer(const buffer &buffer,
+ cl_map_flags flags,
+ size_t offset,
+ size_t size,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+ return enqueue_map_buffer(buffer, flags, offset, size, event_, events);
+ }
+
+ /// Enqueues a command to map \p buffer into the host address space.
+ /// Map operation is performed asynchronously. The pointer to the mapped
+ /// region cannot be used until the map operation has completed.
+ ///
+ /// Event associated with map operation is returned through
+ /// \p map_buffer_event parameter.
+ ///
+ /// \see_opencl_ref{clEnqueueMapBuffer}
+ void* enqueue_map_buffer_async(const buffer &buffer,
+ cl_map_flags flags,
+ size_t offset,
+ size_t size,
+ event &map_buffer_event,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(offset + size <= buffer.size());
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+
+ cl_int ret = 0;
+ void *pointer = clEnqueueMapBuffer(
+ m_queue,
+ buffer.get(),
+ CL_FALSE,
+ flags,
+ offset,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &map_buffer_event.get(),
+ &ret
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return pointer;
+ }
+
+ /// Enqueues a command to unmap \p buffer from the host memory space.
+ ///
+ /// \see_opencl_ref{clEnqueueUnmapMemObject}
+ event enqueue_unmap_buffer(const buffer &buffer,
+ void *mapped_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(buffer.get_context() == this->get_context());
+
+ return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events);
+ }
+
+ /// Enqueues a command to unmap \p mem from the host memory space.
+ ///
+ /// \see_opencl_ref{clEnqueueUnmapMemObject}
+ event enqueue_unmap_mem_object(cl_mem mem,
+ void *mapped_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueUnmapMemObject(
+ m_queue,
+ mem,
+ mapped_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to read data from \p image to host memory.
+ ///
+ /// \see_opencl_ref{clEnqueueReadImage}
+ event enqueue_read_image(const image_object& image,
+ const size_t *origin,
+ const size_t *region,
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *host_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueReadImage(
+ m_queue,
+ image.get(),
+ CL_TRUE,
+ origin,
+ region,
+ row_pitch,
+ slice_pitch,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// \overload
+ template<size_t N>
+ event enqueue_read_image(const image_object& image,
+ const extents<N> origin,
+ const extents<N> region,
+ void *host_ptr,
+ size_t row_pitch = 0,
+ size_t slice_pitch = 0,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ size_t origin3[3] = { 0, 0, 0 };
+ size_t region3[3] = { 1, 1, 1 };
+
+ std::copy(origin.data(), origin.data() + N, origin3);
+ std::copy(region.data(), region.data() + N, region3);
+
+ return enqueue_read_image(
+ image, origin3, region3, row_pitch, slice_pitch, host_ptr, events
+ );
+ }
+
+ /// Enqueues a command to write data from host memory to \p image.
+ ///
+ /// \see_opencl_ref{clEnqueueWriteImage}
+ event enqueue_write_image(image_object& image,
+ const size_t *origin,
+ const size_t *region,
+ const void *host_ptr,
+ size_t input_row_pitch = 0,
+ size_t input_slice_pitch = 0,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueWriteImage(
+ m_queue,
+ image.get(),
+ CL_TRUE,
+ origin,
+ region,
+ input_row_pitch,
+ input_slice_pitch,
+ host_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// \overload
+ template<size_t N>
+ event enqueue_write_image(image_object& image,
+ const extents<N> origin,
+ const extents<N> region,
+ const void *host_ptr,
+ const size_t input_row_pitch = 0,
+ const size_t input_slice_pitch = 0,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ size_t origin3[3] = { 0, 0, 0 };
+ size_t region3[3] = { 1, 1, 1 };
+
+ std::copy(origin.data(), origin.data() + N, origin3);
+ std::copy(region.data(), region.data() + N, region3);
+
+ return enqueue_write_image(
+ image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events
+ );
+ }
+
+ /// Enqueues a command to map \p image into the host address space.
+ ///
+ /// Event associated with map operation is returned through
+ /// \p map_image_event parameter.
+ ///
+ /// \see_opencl_ref{clEnqueueMapImage}
+ void* enqueue_map_image(const image_object &image,
+ cl_map_flags flags,
+ const size_t *origin,
+ const size_t *region,
+ size_t &output_row_pitch,
+ size_t &output_slice_pitch,
+ event &map_image_event,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ cl_int ret = 0;
+ void *pointer = clEnqueueMapImage(
+ m_queue,
+ image.get(),
+ CL_TRUE,
+ flags,
+ origin,
+ region,
+ &output_row_pitch,
+ &output_slice_pitch,
+ events.size(),
+ events.get_event_ptr(),
+ &map_image_event.get(),
+ &ret
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return pointer;
+ }
+
+ /// \overload
+ void* enqueue_map_image(const image_object &image,
+ cl_map_flags flags,
+ const size_t *origin,
+ const size_t *region,
+ size_t &output_row_pitch,
+ size_t &output_slice_pitch,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+ return enqueue_map_image(
+ image, flags, origin, region,
+ output_row_pitch, output_slice_pitch, event_, events
+ );
+ }
+
+ /// \overload
+ template<size_t N>
+ void* enqueue_map_image(image_object& image,
+ cl_map_flags flags,
+ const extents<N> origin,
+ const extents<N> region,
+ size_t &output_row_pitch,
+ size_t &output_slice_pitch,
+ event &map_image_event,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ size_t origin3[3] = { 0, 0, 0 };
+ size_t region3[3] = { 1, 1, 1 };
+
+ std::copy(origin.data(), origin.data() + N, origin3);
+ std::copy(region.data(), region.data() + N, region3);
+
+ return enqueue_map_image(
+ image, flags, origin3, region3,
+ output_row_pitch, output_slice_pitch, map_image_event, events
+ );
+ }
+
+ /// \overload
+ template<size_t N>
+ void* enqueue_map_image(image_object& image,
+ cl_map_flags flags,
+ const extents<N> origin,
+ const extents<N> region,
+ size_t &output_row_pitch,
+ size_t &output_slice_pitch,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+ return enqueue_map_image(
+ image, flags, origin, region,
+ output_row_pitch, output_slice_pitch, event_, events
+ );
+ }
+
+ /// Enqueues a command to map \p image into the host address space.
+ /// Map operation is performed asynchronously. The pointer to the mapped
+ /// region cannot be used until the map operation has completed.
+ ///
+ /// Event associated with map operation is returned through
+ /// \p map_image_event parameter.
+ ///
+ /// \see_opencl_ref{clEnqueueMapImage}
+ void* enqueue_map_image_async(const image_object &image,
+ cl_map_flags flags,
+ const size_t *origin,
+ const size_t *region,
+ size_t &output_row_pitch,
+ size_t &output_slice_pitch,
+ event &map_image_event,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ cl_int ret = 0;
+ void *pointer = clEnqueueMapImage(
+ m_queue,
+ image.get(),
+ CL_FALSE,
+ flags,
+ origin,
+ region,
+ &output_row_pitch,
+ &output_slice_pitch,
+ events.size(),
+ events.get_event_ptr(),
+ &map_image_event.get(),
+ &ret
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return pointer;
+ }
+
+ /// \overload
+ template<size_t N>
+ void* enqueue_map_image_async(image_object& image,
+ cl_map_flags flags,
+ const extents<N> origin,
+ const extents<N> region,
+ size_t &output_row_pitch,
+ size_t &output_slice_pitch,
+ event &map_image_event,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ size_t origin3[3] = { 0, 0, 0 };
+ size_t region3[3] = { 1, 1, 1 };
+
+ std::copy(origin.data(), origin.data() + N, origin3);
+ std::copy(region.data(), region.data() + N, region3);
+
+ return enqueue_map_image_async(
+ image, flags, origin3, region3,
+ output_row_pitch, output_slice_pitch, map_image_event, events
+ );
+ }
+
+ /// Enqueues a command to unmap \p image from the host memory space.
+ ///
+ /// \see_opencl_ref{clEnqueueUnmapMemObject}
+ event enqueue_unmap_image(const image_object &image,
+ void *mapped_ptr,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ return enqueue_unmap_mem_object(image.get(), mapped_ptr, events);
+ }
+
+ /// Enqueues a command to copy data from \p src_image to \p dst_image.
+ ///
+ /// \see_opencl_ref{clEnqueueCopyImage}
+ event enqueue_copy_image(const image_object& src_image,
+ image_object& dst_image,
+ const size_t *src_origin,
+ const size_t *dst_origin,
+ const size_t *region,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueCopyImage(
+ m_queue,
+ src_image.get(),
+ dst_image.get(),
+ src_origin,
+ dst_origin,
+ region,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// \overload
+ template<size_t N>
+ event enqueue_copy_image(const image_object& src_image,
+ image_object& dst_image,
+ const extents<N> src_origin,
+ const extents<N> dst_origin,
+ const extents<N> region,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(src_image.get_context() == this->get_context());
+ BOOST_ASSERT(dst_image.get_context() == this->get_context());
+ BOOST_ASSERT_MSG(src_image.format() == dst_image.format(),
+ "Source and destination image formats must match.");
+
+ size_t src_origin3[3] = { 0, 0, 0 };
+ size_t dst_origin3[3] = { 0, 0, 0 };
+ size_t region3[3] = { 1, 1, 1 };
+
+ std::copy(src_origin.data(), src_origin.data() + N, src_origin3);
+ std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3);
+ std::copy(region.data(), region.data() + N, region3);
+
+ return enqueue_copy_image(
+ src_image, dst_image, src_origin3, dst_origin3, region3, events
+ );
+ }
+
+ /// Enqueues a command to copy data from \p src_image to \p dst_buffer.
+ ///
+ /// \see_opencl_ref{clEnqueueCopyImageToBuffer}
+ event enqueue_copy_image_to_buffer(const image_object& src_image,
+ memory_object& dst_buffer,
+ const size_t *src_origin,
+ const size_t *region,
+ size_t dst_offset,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueCopyImageToBuffer(
+ m_queue,
+ src_image.get(),
+ dst_buffer.get(),
+ src_origin,
+ region,
+ dst_offset,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to copy data from \p src_buffer to \p dst_image.
+ ///
+ /// \see_opencl_ref{clEnqueueCopyBufferToImage}
+ event enqueue_copy_buffer_to_image(const memory_object& src_buffer,
+ image_object& dst_image,
+ size_t src_offset,
+ const size_t *dst_origin,
+ const size_t *region,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueCopyBufferToImage(
+ m_queue,
+ src_buffer.get(),
+ dst_image.get(),
+ src_offset,
+ dst_origin,
+ region,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a command to fill \p image with \p fill_color.
+ ///
+ /// \see_opencl_ref{clEnqueueFillImage}
+ ///
+ /// \opencl_version_warning{1,2}
+ event enqueue_fill_image(image_object& image,
+ const void *fill_color,
+ const size_t *origin,
+ const size_t *region,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueFillImage(
+ m_queue,
+ image.get(),
+ fill_color,
+ origin,
+ region,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// \overload
+ template<size_t N>
+ event enqueue_fill_image(image_object& image,
+ const void *fill_color,
+ const extents<N> origin,
+ const extents<N> region,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(image.get_context() == this->get_context());
+
+ size_t origin3[3] = { 0, 0, 0 };
+ size_t region3[3] = { 1, 1, 1 };
+
+ std::copy(origin.data(), origin.data() + N, origin3);
+ std::copy(region.data(), region.data() + N, region3);
+
+ return enqueue_fill_image(
+ image, fill_color, origin3, region3, events
+ );
+ }
+
+ /// Enqueues a command to migrate \p mem_objects.
+ ///
+ /// \see_opencl_ref{clEnqueueMigrateMemObjects}
+ ///
+ /// \opencl_version_warning{1,2}
+ event enqueue_migrate_memory_objects(uint_ num_mem_objects,
+ const cl_mem *mem_objects,
+ cl_mem_migration_flags flags,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueMigrateMemObjects(
+ m_queue,
+ num_mem_objects,
+ mem_objects,
+ flags,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Enqueues a kernel for execution.
+ ///
+ /// \see_opencl_ref{clEnqueueNDRangeKernel}
+ event enqueue_nd_range_kernel(const kernel &kernel,
+ size_t work_dim,
+ const size_t *global_work_offset,
+ const size_t *global_work_size,
+ const size_t *local_work_size,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(kernel.get_context() == this->get_context());
+
+ event event_;
+
+ cl_int ret = clEnqueueNDRangeKernel(
+ m_queue,
+ kernel,
+ static_cast<cl_uint>(work_dim),
+ global_work_offset,
+ global_work_size,
+ local_work_size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// \overload
+ template<size_t N>
+ event enqueue_nd_range_kernel(const kernel &kernel,
+ const extents<N> &global_work_offset,
+ const extents<N> &global_work_size,
+ const extents<N> &local_work_size,
+ const wait_list &events = wait_list())
+ {
+ return enqueue_nd_range_kernel(
+ kernel,
+ N,
+ global_work_offset.data(),
+ global_work_size.data(),
+ local_work_size.data(),
+ events
+ );
+ }
+
+ /// Convenience method which calls enqueue_nd_range_kernel() with a
+ /// one-dimensional range.
+ event enqueue_1d_range_kernel(const kernel &kernel,
+ size_t global_work_offset,
+ size_t global_work_size,
+ size_t local_work_size,
+ const wait_list &events = wait_list())
+ {
+ return enqueue_nd_range_kernel(
+ kernel,
+ 1,
+ &global_work_offset,
+ &global_work_size,
+ local_work_size ? &local_work_size : 0,
+ events
+ );
+ }
+
+ /// Enqueues a kernel to execute using a single work-item.
+ ///
+ /// \see_opencl_ref{clEnqueueTask}
+ event enqueue_task(const kernel &kernel, const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+ BOOST_ASSERT(kernel.get_context() == this->get_context());
+
+ event event_;
+
+ // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we
+ // just forward to the equivalent clEnqueueNDRangeKernel() call.
+ #ifdef CL_VERSION_2_0
+ size_t one = 1;
+ cl_int ret = clEnqueueNDRangeKernel(
+ m_queue, kernel, 1, 0, &one, &one,
+ events.size(), events.get_event_ptr(), &event_.get()
+ );
+ #else
+ cl_int ret = clEnqueueTask(
+ m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get()
+ );
+ #endif
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a function to execute on the host.
+ event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *),
+ void *args,
+ size_t cb_args,
+ uint_ num_mem_objects,
+ const cl_mem *mem_list,
+ const void **args_mem_loc,
+ const wait_list &events = wait_list())
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+ cl_int ret = clEnqueueNativeKernel(
+ m_queue,
+ user_func,
+ args,
+ cb_args,
+ num_mem_objects,
+ mem_list,
+ args_mem_loc,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Convenience overload for enqueue_native_kernel() which enqueues a
+ /// native kernel on the host with a nullary function.
+ event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void),
+ const wait_list &events = wait_list())
+ {
+ return enqueue_native_kernel(
+ detail::nullary_native_kernel_trampoline,
+ reinterpret_cast<void *>(&user_func),
+ sizeof(user_func),
+ 0,
+ 0,
+ 0,
+ events
+ );
+ }
+
+ /// Flushes the command queue.
+ ///
+ /// \see_opencl_ref{clFlush}
+ void flush()
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ clFlush(m_queue);
+ }
+
+ /// Blocks until all outstanding commands in the queue have finished.
+ ///
+ /// \see_opencl_ref{clFinish}
+ void finish()
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ clFinish(m_queue);
+ }
+
+ /// Enqueues a barrier in the queue.
+ void enqueue_barrier()
+ {
+ BOOST_ASSERT(m_queue != 0);
+ cl_int ret = CL_SUCCESS;
+
+ #ifdef CL_VERSION_1_2
+ if(get_device().check_version(1, 2)){
+ ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0);
+ } else
+ #endif // CL_VERSION_1_2
+ {
+ // Suppress deprecated declarations warning
+ BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
+ ret = clEnqueueBarrier(m_queue);
+ BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
+ }
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a barrier in the queue after \p events.
+ ///
+ /// \opencl_version_warning{1,2}
+ event enqueue_barrier(const wait_list &events)
+ {
+ BOOST_ASSERT(m_queue != 0);
+
+ event event_;
+ cl_int ret = CL_SUCCESS;
+
+ ret = clEnqueueBarrierWithWaitList(
+ m_queue, events.size(), events.get_event_ptr(), &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Enqueues a marker in the queue and returns an event that can be
+ /// used to track its progress.
+ event enqueue_marker()
+ {
+ event event_;
+ cl_int ret = CL_SUCCESS;
+
+ #ifdef CL_VERSION_1_2
+ if(get_device().check_version(1, 2)){
+ ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get());
+ } else
+ #endif
+ {
+ // Suppress deprecated declarations warning
+ BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
+ ret = clEnqueueMarker(m_queue, &event_.get());
+ BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
+ }
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a marker after \p events in the queue and returns an
+ /// event that can be used to track its progress.
+ ///
+ /// \opencl_version_warning{1,2}
+ event enqueue_marker(const wait_list &events)
+ {
+ event event_;
+
+ cl_int ret = clEnqueueMarkerWithWaitList(
+ m_queue, events.size(), events.get_event_ptr(), &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_1_2
+
+ #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Enqueues a command to copy \p size bytes of data from \p src_ptr to
+ /// \p dst_ptr.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clEnqueueSVMMemcpy}
+ event enqueue_svm_memcpy(void *dst_ptr,
+ const void *src_ptr,
+ size_t size,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+
+ cl_int ret = clEnqueueSVMMemcpy(
+ m_queue,
+ CL_TRUE,
+ dst_ptr,
+ src_ptr,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to copy \p size bytes of data from \p src_ptr to
+ /// \p dst_ptr. The operation is performed asynchronously.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clEnqueueSVMMemcpy}
+ event enqueue_svm_memcpy_async(void *dst_ptr,
+ const void *src_ptr,
+ size_t size,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+
+ cl_int ret = clEnqueueSVMMemcpy(
+ m_queue,
+ CL_FALSE,
+ dst_ptr,
+ src_ptr,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to fill \p size bytes of data at \p svm_ptr with
+ /// \p pattern.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clEnqueueSVMMemFill}
+ event enqueue_svm_fill(void *svm_ptr,
+ const void *pattern,
+ size_t pattern_size,
+ size_t size,
+ const wait_list &events = wait_list())
+
+ {
+ event event_;
+
+ cl_int ret = clEnqueueSVMMemFill(
+ m_queue,
+ svm_ptr,
+ pattern,
+ pattern_size,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to free \p svm_ptr.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clEnqueueSVMFree}
+ ///
+ /// \see svm_free()
+ event enqueue_svm_free(void *svm_ptr,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+
+ cl_int ret = clEnqueueSVMFree(
+ m_queue,
+ 1,
+ &svm_ptr,
+ 0,
+ 0,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to map \p svm_ptr to the host memory space.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clEnqueueSVMMap}
+ event enqueue_svm_map(void *svm_ptr,
+ size_t size,
+ cl_map_flags flags,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+
+ cl_int ret = clEnqueueSVMMap(
+ m_queue,
+ CL_TRUE,
+ flags,
+ svm_ptr,
+ size,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+
+ /// Enqueues a command to unmap \p svm_ptr from the host memory space.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clEnqueueSVMUnmap}
+ event enqueue_svm_unmap(void *svm_ptr,
+ const wait_list &events = wait_list())
+ {
+ event event_;
+
+ cl_int ret = clEnqueueSVMUnmap(
+ m_queue,
+ svm_ptr,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get()
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+ }
+ #endif // CL_VERSION_2_0
+
+ /// Returns \c true if the command queue is the same at \p other.
+ bool operator==(const command_queue &other) const
+ {
+ return m_queue == other.m_queue;
+ }
+
+ /// Returns \c true if the command queue is different from \p other.
+ bool operator!=(const command_queue &other) const
+ {
+ return m_queue != other.m_queue;
+ }
+
+ /// \internal_
+ operator cl_command_queue() const
+ {
+ return m_queue;
+ }
+
+ /// \internal_
+ bool check_device_version(int major, int minor) const
+ {
+ return get_device().check_version(major, minor);
+ }
+
+private:
+ cl_command_queue m_queue;
+};
+
+inline buffer buffer::clone(command_queue &queue) const
+{
+ buffer copy(get_context(), size(), get_memory_flags());
+ queue.enqueue_copy_buffer(*this, copy, 0, 0, size());
+ return copy;
+}
+
+inline image1d image1d::clone(command_queue &queue) const
+{
+ image1d copy(
+ get_context(), width(), format(), get_memory_flags()
+ );
+
+ queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size());
+
+ return copy;
+}
+
+inline image2d image2d::clone(command_queue &queue) const
+{
+ image2d copy(
+ get_context(), width(), height(), format(), get_memory_flags()
+ );
+
+ queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size());
+
+ return copy;
+}
+
+inline image3d image3d::clone(command_queue &queue) const
+{
+ image3d copy(
+ get_context(), width(), height(), depth(), format(), get_memory_flags()
+ );
+
+ queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size());
+
+ return copy;
+}
+
+/// \internal_ define get_info() specializations for command_queue
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue,
+ ((cl_context, CL_QUEUE_CONTEXT))
+ ((cl_device_id, CL_QUEUE_DEVICE))
+ ((uint_, CL_QUEUE_REFERENCE_COUNT))
+ ((cl_command_queue_properties, CL_QUEUE_PROPERTIES))
+)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_COMMAND_QUEUE_HPP
diff --git a/boost/compute/config.hpp b/boost/compute/config.hpp
new file mode 100644
index 0000000000..77d0d7b9df
--- /dev/null
+++ b/boost/compute/config.hpp
@@ -0,0 +1,70 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONFIG_HPP
+#define BOOST_COMPUTE_CONFIG_HPP
+
+#include <boost/config.hpp>
+#include <boost/version.hpp>
+#include <boost/compute/cl.hpp>
+
+// check for minimum required boost version
+#if BOOST_VERSION < 104800
+#error Boost.Compute requires Boost version 1.48 or later
+#endif
+
+// the BOOST_COMPUTE_NO_VARIADIC_TEMPLATES macro is defined
+// if the compiler does not *fully* support variadic templates
+#if defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES) || \
+ defined(BOOST_NO_VARIADIC_TEMPLATES) || \
+ (defined(__GNUC__) && !defined(__clang__) && \
+ __GNUC__ == 4 && __GNUC_MINOR__ <= 6)
+ #define BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+#endif // BOOST_NO_CXX11_VARIADIC_TEMPLATES
+
+// the BOOST_COMPUTE_NO_STD_TUPLE macro is defined if the
+// compiler/stdlib does not support std::tuple
+#if defined(BOOST_NO_CXX11_HDR_TUPLE) || \
+ defined(BOOST_NO_0X_HDR_TUPLE) || \
+ defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES)
+ #define BOOST_COMPUTE_NO_STD_TUPLE
+#endif // BOOST_NO_CXX11_HDR_TUPLE
+
+// defines BOOST_COMPUTE_CL_CALLBACK to the value of CL_CALLBACK
+// if it is defined (it was added in OpenCL 1.1). this is used to
+// annotate certain callback functions registered with OpenCL
+#ifdef CL_CALLBACK
+# define BOOST_COMPUTE_CL_CALLBACK CL_CALLBACK
+#else
+# define BOOST_COMPUTE_CL_CALLBACK
+#endif
+
+// Maximum number of iterators acceptable for make_zip_iterator
+#ifndef BOOST_COMPUTE_MAX_ARITY
+ // should be no more than max boost::tuple size (10 by default)
+# define BOOST_COMPUTE_MAX_ARITY 10
+#endif
+
+#if !defined(BOOST_COMPUTE_DOXYGEN_INVOKED) && \
+ (defined(BOOST_NO_CXX11_RVALUE_REFERENCES) || defined(BOOST_NO_RVALUE_REFERENCES))
+# define BOOST_COMPUTE_NO_RVALUE_REFERENCES
+#endif // BOOST_NO_CXX11_RVALUE_REFERENCES
+
+#if defined(BOOST_NO_CXX11_HDR_INITIALIZER_LIST) || \
+ defined(BOOST_NO_0X_HDR_INITIALIZER_LIST)
+# define BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+#endif // BOOST_NO_CXX11_HDR_INITIALIZER_LIST
+
+#if defined(BOOST_NO_CXX11_HDR_CHRONO) || \
+ defined(BOOST_NO_0X_HDR_CHRONO)
+# define BOOST_COMPUTE_NO_HDR_CHRONO
+#endif // BOOST_NO_CXX11_HDR_CHRONO
+
+#endif // BOOST_COMPUTE_CONFIG_HPP
diff --git a/boost/compute/container.hpp b/boost/compute/container.hpp
new file mode 100644
index 0000000000..fc14f5fde2
--- /dev/null
+++ b/boost/compute/container.hpp
@@ -0,0 +1,27 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_HPP
+#define BOOST_COMPUTE_CONTAINER_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute container headers.
+
+#include <boost/compute/container/array.hpp>
+#include <boost/compute/container/basic_string.hpp>
+#include <boost/compute/container/dynamic_bitset.hpp>
+#include <boost/compute/container/flat_map.hpp>
+#include <boost/compute/container/flat_set.hpp>
+#include <boost/compute/container/mapped_view.hpp>
+#include <boost/compute/container/string.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#endif // BOOST_COMPUTE_CONTAINER_HPP
diff --git a/boost/compute/container/array.hpp b/boost/compute/container/array.hpp
new file mode 100644
index 0000000000..919be6eeac
--- /dev/null
+++ b/boost/compute/container/array.hpp
@@ -0,0 +1,281 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_ARRAY_HPP
+#define BOOST_COMPUTE_CONTAINER_ARRAY_HPP
+
+#include <cstddef>
+#include <iterator>
+#include <exception>
+
+#include <boost/array.hpp>
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/swap_ranges.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/detail/capture_traits.hpp>
+#include <boost/compute/detail/buffer_value.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class array
+/// \brief A fixed-size container.
+///
+/// The array container is very similar to the \ref vector container except
+/// its size is fixed at compile-time rather than being dynamically resizable
+/// at run-time.
+///
+/// For example, to create a fixed-size array with eight values on the device:
+/// \code
+/// boost::compute::array<int, 8> values(context);
+/// \endcode
+///
+/// The Boost.Compute \c array class provides a STL-like API and is modeled
+/// after the \c std::array class from the C++ standard library.
+///
+/// \see \ref vector "vector<T>"
+template<class T, std::size_t N>
+class array
+{
+public:
+ typedef T value_type;
+ typedef std::size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef detail::buffer_value<T> reference;
+ typedef const detail::buffer_value<T> const_reference;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef buffer_iterator<T> iterator;
+ typedef buffer_iterator<T> const_iterator;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+ enum {
+ static_size = N
+ };
+
+ explicit array(const context &context = system::default_context())
+ : m_buffer(context, sizeof(T) * N)
+ {
+ }
+
+ array(const array<T, N> &other)
+ : m_buffer(other.m_buffer.get_context(), sizeof(T) * N)
+ {
+ boost::compute::copy(other.begin(), other.end(), begin());
+ }
+
+ array(const boost::array<T, N> &array,
+ const context &context = system::default_context())
+ : m_buffer(context, sizeof(T) * N)
+ {
+ boost::compute::copy(array.begin(), array.end(), begin());
+ }
+
+ array<T, N>& operator=(const array<T, N> &other)
+ {
+ if(this != &other){
+ boost::compute::copy(other.begin(), other.end(), begin());
+ }
+
+ return *this;
+ }
+
+ array<T, N>& operator=(const boost::array<T, N> &array)
+ {
+ boost::compute::copy(array.begin(), array.end(), begin());
+
+ return *this;
+ }
+
+ ~array()
+ {
+ }
+
+ iterator begin()
+ {
+ return buffer_iterator<T>(m_buffer, 0);
+ }
+
+ const_iterator begin() const
+ {
+ return buffer_iterator<T>(m_buffer, 0);
+ }
+
+ const_iterator cbegin() const
+ {
+ return begin();
+ }
+
+ iterator end()
+ {
+ return buffer_iterator<T>(m_buffer, N);
+ }
+
+ const_iterator end() const
+ {
+ return buffer_iterator<T>(m_buffer, N);
+ }
+
+ const_iterator cend() const
+ {
+ return end();
+ }
+
+ reverse_iterator rbegin()
+ {
+ return reverse_iterator(end() - 1);
+ }
+
+ const_reverse_iterator rbegin() const
+ {
+ return reverse_iterator(end() - 1);
+ }
+
+ const_reverse_iterator crbegin() const
+ {
+ return rbegin();
+ }
+
+ reverse_iterator rend()
+ {
+ return reverse_iterator(begin() - 1);
+ }
+
+ const_reverse_iterator rend() const
+ {
+ return reverse_iterator(begin() - 1);
+ }
+
+ const_reverse_iterator crend() const
+ {
+ return rend();
+ }
+
+ size_type size() const
+ {
+ return N;
+ }
+
+ bool empty() const
+ {
+ return N == 0;
+ }
+
+ size_type max_size() const
+ {
+ return N;
+ }
+
+ reference operator[](size_type index)
+ {
+ return *(begin() + static_cast<difference_type>(index));
+ }
+
+ const_reference operator[](size_type index) const
+ {
+ return *(begin() + static_cast<difference_type>(index));
+ }
+
+ reference at(size_type index)
+ {
+ if(index >= N){
+ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range"));
+ }
+
+ return operator[](index);
+ }
+
+ const_reference at(size_type index) const
+ {
+ if(index >= N){
+ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range"));
+ }
+
+ return operator[](index);
+ }
+
+ reference front()
+ {
+ return *begin();
+ }
+
+ const_reference front() const
+ {
+ return *begin();
+ }
+
+ reference back()
+ {
+ return *(end() - static_cast<difference_type>(1));
+ }
+
+ const_reference back() const
+ {
+ return *(end() - static_cast<difference_type>(1));
+ }
+
+ void fill(const value_type &value)
+ {
+ ::boost::compute::fill(begin(), end(), value);
+ }
+
+ void swap(array<T, N> &other)
+ {
+ ::boost::compute::swap_ranges(begin(), end(), other.begin());
+ }
+
+ const buffer& get_buffer() const
+ {
+ return m_buffer;
+ }
+
+private:
+ buffer m_buffer;
+};
+
+namespace detail {
+
+// set_kernel_arg specialization for array<T, N>
+template<class T, std::size_t N>
+struct set_kernel_arg<array<T, N> >
+{
+ void operator()(kernel &kernel_, size_t index, const array<T, N> &array)
+ {
+ kernel_.set_arg(index, array.get_buffer());
+ }
+};
+
+// for capturing array<T, N> with BOOST_COMPUTE_CLOSURE()
+template<class T, size_t N>
+struct capture_traits<array<T, N> >
+{
+ static std::string type_name()
+ {
+ return std::string("__global ") + ::boost::compute::type_name<T>() + "*";
+ }
+};
+
+// meta_kernel streaming operator for array<T, N>
+template<class T, size_t N>
+meta_kernel& operator<<(meta_kernel &k, const array<T, N> &array)
+{
+ return k << k.get_buffer_identifier<T>(array.get_buffer());
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_ARRAY_HPP
diff --git a/boost/compute/container/basic_string.hpp b/boost/compute/container/basic_string.hpp
new file mode 100644
index 0000000000..c5a2c46aa5
--- /dev/null
+++ b/boost/compute/container/basic_string.hpp
@@ -0,0 +1,331 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP
+#define BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP
+
+#include <string>
+#include <cstring>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/algorithm/search.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <iosfwd>
+
+namespace boost {
+namespace compute {
+
+/// \class basic_string
+/// \brief A template for a dynamically-sized character sequence.
+///
+/// The \c basic_string class provides a generic template for a dynamically-
+/// sized character sequence. This is most commonly used through the \c string
+/// typedef (for \c basic_string<char>).
+///
+/// For example, to create a string on the device with its contents copied
+/// from a C-string on the host:
+/// \code
+/// boost::compute::string str("hello, world!");
+/// \endcode
+///
+/// \see \ref vector "vector<T>"
+template<class CharT, class Traits = std::char_traits<CharT> >
+class basic_string
+{
+public:
+ typedef Traits traits_type;
+ typedef typename Traits::char_type value_type;
+ typedef size_t size_type;
+ static const size_type npos = size_type(-1);
+ typedef typename ::boost::compute::vector<CharT>::reference reference;
+ typedef typename ::boost::compute::vector<CharT>::const_reference const_reference;
+ typedef typename ::boost::compute::vector<CharT>::iterator iterator;
+ typedef typename ::boost::compute::vector<CharT>::const_iterator const_iterator;
+ typedef typename ::boost::compute::vector<CharT>::reverse_iterator reverse_iterator;
+ typedef typename ::boost::compute::vector<CharT>::const_reverse_iterator const_reverse_iterator;
+
+ basic_string()
+ {
+ }
+
+ basic_string(size_type count, CharT ch)
+ : m_data(count)
+ {
+ std::fill(m_data.begin(), m_data.end(), ch);
+ }
+
+ basic_string(const basic_string &other,
+ size_type pos,
+ size_type count = npos)
+ : m_data(other.begin() + pos,
+ other.begin() + (std::min)(other.size(), count))
+ {
+ }
+
+ basic_string(const char *s, size_type count)
+ : m_data(s, s + count)
+ {
+ }
+
+ basic_string(const char *s)
+ : m_data(s, s + std::strlen(s))
+ {
+ }
+
+ template<class InputIterator>
+ basic_string(InputIterator first, InputIterator last)
+ : m_data(first, last)
+ {
+ }
+
+ basic_string(const basic_string<CharT, Traits> &other)
+ : m_data(other.m_data)
+ {
+ }
+
+ basic_string<CharT, Traits>& operator=(const basic_string<CharT, Traits> &other)
+ {
+ if(this != &other){
+ m_data = other.m_data;
+ }
+
+ return *this;
+ }
+
+ ~basic_string()
+ {
+ }
+
+ reference at(size_type pos)
+ {
+ return m_data.at(pos);
+ }
+
+ const_reference at(size_type pos) const
+ {
+ return m_data.at(pos);
+ }
+
+ reference operator[](size_type pos)
+ {
+ return m_data[pos];
+ }
+
+ const_reference operator[](size_type pos) const
+ {
+ return m_data[pos];
+ }
+
+ reference front()
+ {
+ return m_data.front();
+ }
+
+ const_reference front() const
+ {
+ return m_data.front();
+ }
+
+ reference back()
+ {
+ return m_data.back();
+ }
+
+ const_reference back() const
+ {
+ return m_data.back();
+ }
+
+ iterator begin()
+ {
+ return m_data.begin();
+ }
+
+ const_iterator begin() const
+ {
+ return m_data.begin();
+ }
+
+ const_iterator cbegin() const
+ {
+ return m_data.cbegin();
+ }
+
+ iterator end()
+ {
+ return m_data.end();
+ }
+
+ const_iterator end() const
+ {
+ return m_data.end();
+ }
+
+ const_iterator cend() const
+ {
+ return m_data.cend();
+ }
+
+ reverse_iterator rbegin()
+ {
+ return m_data.rbegin();
+ }
+
+ const_reverse_iterator rbegin() const
+ {
+ return m_data.rbegin();
+ }
+
+ const_reverse_iterator crbegin() const
+ {
+ return m_data.crbegin();
+ }
+
+ reverse_iterator rend()
+ {
+ return m_data.rend();
+ }
+
+ const_reverse_iterator rend() const
+ {
+ return m_data.rend();
+ }
+
+ const_reverse_iterator crend() const
+ {
+ return m_data.crend();
+ }
+
+ bool empty() const
+ {
+ return m_data.empty();
+ }
+
+ size_type size() const
+ {
+ return m_data.size();
+ }
+
+ size_type length() const
+ {
+ return m_data.size();
+ }
+
+ size_type max_size() const
+ {
+ return m_data.max_size();
+ }
+
+ void reserve(size_type size)
+ {
+ m_data.reserve(size);
+ }
+
+ size_type capacity() const
+ {
+ return m_data.capacity();
+ }
+
+ void shrink_to_fit()
+ {
+ m_data.shrink_to_fit();
+ }
+
+ void clear()
+ {
+ m_data.clear();
+ }
+
+ void swap(basic_string<CharT, Traits> &other)
+ {
+ if(this != &other)
+ {
+ ::boost::compute::vector<CharT> temp_data(other.m_data);
+ other.m_data = m_data;
+ m_data = temp_data;
+ }
+ }
+
+ basic_string<CharT, Traits> substr(size_type pos = 0,
+ size_type count = npos) const
+ {
+ return basic_string<CharT, Traits>(*this, pos, count);
+ }
+
+ /// Finds the first character \p ch
+ size_type find(CharT ch, size_type pos = 0) const
+ {
+ const_iterator iter = ::boost::compute::find(begin() + pos, end(), ch);
+ if(iter == end()){
+ return npos;
+ }
+ else {
+ return static_cast<size_type>(std::distance(begin(), iter));
+ }
+ }
+
+ /// Finds the first substring equal to \p str
+ size_type find(basic_string& str, size_type pos = 0) const
+ {
+ const_iterator iter = ::boost::compute::search(begin() + pos, end(),
+ str.begin(), str.end());
+ if(iter == end()){
+ return npos;
+ }
+ else {
+ return static_cast<size_type>(std::distance(begin(), iter));
+ }
+ }
+
+ /// Finds the first substring equal to the character string
+ /// pointed to by \p s.
+ /// The length of the string is determined by the first null character.
+ ///
+ /// For example, the following code
+ /// \snippet test/test_string.cpp string_find
+ ///
+ /// will return 5 as position.
+ size_type find(const char* s, size_type pos = 0) const
+ {
+ basic_string str(s);
+ const_iterator iter = ::boost::compute::search(begin() + pos, end(),
+ str.begin(), str.end());
+ if(iter == end()){
+ return npos;
+ }
+ else {
+ return static_cast<size_type>(std::distance(begin(), iter));
+ }
+ }
+
+private:
+ ::boost::compute::vector<CharT> m_data;
+};
+
+template<class CharT, class Traits>
+std::ostream&
+operator<<(std::ostream& stream,
+ boost::compute::basic_string<CharT, Traits>const& outStr)
+{
+ command_queue queue = ::boost::compute::system::default_queue();
+ boost::compute::copy(outStr.begin(),
+ outStr.end(),
+ std::ostream_iterator<CharT>(stream),
+ queue);
+ return stream;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP
diff --git a/boost/compute/container/detail/scalar.hpp b/boost/compute/container/detail/scalar.hpp
new file mode 100644
index 0000000000..7ecd86e540
--- /dev/null
+++ b/boost/compute/container/detail/scalar.hpp
@@ -0,0 +1,61 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP
+#define BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// scalar<T> provides a trivial "container" that stores a
+// single value in a memory buffer on a compute device
+template<class T>
+class scalar
+{
+public:
+ typedef T value_type;
+
+ scalar(const context &context)
+ : m_buffer(context, sizeof(T))
+ {
+ }
+
+ ~scalar()
+ {
+ }
+
+ T read(command_queue &queue) const
+ {
+ return read_single_value<T>(m_buffer, 0, queue);
+ }
+
+ void write(const T &value, command_queue &queue)
+ {
+ write_single_value<T>(value, m_buffer, 0, queue);
+ }
+
+ const buffer& get_buffer() const
+ {
+ return m_buffer;
+ }
+
+private:
+ buffer m_buffer;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP
diff --git a/boost/compute/container/dynamic_bitset.hpp b/boost/compute/container/dynamic_bitset.hpp
new file mode 100644
index 0000000000..7f41901d64
--- /dev/null
+++ b/boost/compute/container/dynamic_bitset.hpp
@@ -0,0 +1,237 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP
+#define BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/algorithm/any_of.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/transform_reduce.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/functional/integer.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class dynamic_bitset
+/// \brief The dynamic_bitset class contains a resizable bit array.
+///
+/// For example, to create a dynamic-bitset with space for 1000 bits on the
+/// device:
+/// \code
+/// boost::compute::dynamic_bitset<> bits(1000, queue);
+/// \endcode
+///
+/// The Boost.Compute \c dynamic_bitset class provides a STL-like API and is
+/// modeled after the \c boost::dynamic_bitset class from Boost.
+///
+/// \see \ref vector "vector<T>"
+template<class Block = ulong_, class Alloc = buffer_allocator<Block> >
+class dynamic_bitset
+{
+public:
+ typedef Block block_type;
+ typedef Alloc allocator_type;
+ typedef vector<Block, Alloc> container_type;
+ typedef typename container_type::size_type size_type;
+
+ BOOST_STATIC_CONSTANT(size_type, bits_per_block = sizeof(block_type) * CHAR_BIT);
+ BOOST_STATIC_CONSTANT(size_type, npos = static_cast<size_type>(-1));
+
+ /// Creates a new dynamic bitset with storage for \p size bits. Initializes
+ /// all bits to zero.
+ dynamic_bitset(size_type size, command_queue &queue)
+ : m_bits(size / sizeof(block_type), queue.get_context()),
+ m_size(size)
+ {
+ // initialize all bits to zero
+ reset(queue);
+ }
+
+ /// Creates a new dynamic bitset as a copy of \p other.
+ dynamic_bitset(const dynamic_bitset &other)
+ : m_bits(other.m_bits),
+ m_size(other.m_size)
+ {
+ }
+
+ /// Copies the data from \p other to \c *this.
+ dynamic_bitset& operator=(const dynamic_bitset &other)
+ {
+ if(this != &other){
+ m_bits = other.m_bits;
+ m_size = other.m_size;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the dynamic bitset.
+ ~dynamic_bitset()
+ {
+ }
+
+ /// Returns the size of the dynamic bitset.
+ size_type size() const
+ {
+ return m_size;
+ }
+
+ /// Returns the number of blocks to store the bits in the dynamic bitset.
+ size_type num_blocks() const
+ {
+ return m_bits.size();
+ }
+
+ /// Returns the maximum possible size for the dynamic bitset.
+ size_type max_size() const
+ {
+ return m_bits.max_size() * bits_per_block;
+ }
+
+ /// Returns \c true if the dynamic bitset is empty (i.e. \c size() == \c 0).
+ bool empty() const
+ {
+ return size() == 0;
+ }
+
+ /// Returns the number of set bits (i.e. '1') in the bitset.
+ size_type count(command_queue &queue) const
+ {
+ ulong_ count = 0;
+ transform_reduce(
+ m_bits.begin(),
+ m_bits.end(),
+ &count,
+ popcount<block_type>(),
+ plus<ulong_>(),
+ queue
+ );
+ return static_cast<size_type>(count);
+ }
+
+ /// Resizes the bitset to contain \p num_bits. If the new size is greater
+ /// than the current size the new bits are set to zero.
+ void resize(size_type num_bits, command_queue &queue)
+ {
+ // resize bits
+ const size_type current_block_count = m_bits.size();
+ m_bits.resize(num_bits * bits_per_block, queue);
+
+ // fill new block with zeros (if new blocks were added)
+ const size_type new_block_count = m_bits.size();
+ if(new_block_count > current_block_count){
+ fill_n(
+ m_bits.begin() + current_block_count,
+ new_block_count - current_block_count,
+ block_type(0),
+ queue
+ );
+ }
+
+ // store new size
+ m_size = num_bits;
+ }
+
+ /// Sets the bit at position \p n to \c true.
+ void set(size_type n, command_queue &queue)
+ {
+ set(n, true, queue);
+ }
+
+ /// Sets the bit at position \p n to \p value.
+ void set(size_type n, bool value, command_queue &queue)
+ {
+ const size_type bit = n % bits_per_block;
+ const size_type block = n / bits_per_block;
+
+ // load current block
+ block_type block_value;
+ copy_n(m_bits.begin() + block, 1, &block_value, queue);
+
+ // update block value
+ if(value){
+ block_value |= (size_type(1) << bit);
+ }
+ else {
+ block_value &= ~(size_type(1) << bit);
+ }
+
+ // store new block
+ copy_n(&block_value, 1, m_bits.begin() + block, queue);
+ }
+
+ /// Returns \c true if the bit at position \p n is set (i.e. '1').
+ bool test(size_type n, command_queue &queue)
+ {
+ const size_type bit = n % (sizeof(block_type) * CHAR_BIT);
+ const size_type block = n / (sizeof(block_type) * CHAR_BIT);
+
+ block_type block_value;
+ copy_n(m_bits.begin() + block, 1, &block_value, queue);
+
+ return block_value & (size_type(1) << bit);
+ }
+
+ /// Flips the value of the bit at position \p n.
+ void flip(size_type n, command_queue &queue)
+ {
+ set(n, !test(n, queue), queue);
+ }
+
+ /// Returns \c true if any bit in the bitset is set (i.e. '1').
+ bool any(command_queue &queue) const
+ {
+ return any_of(
+ m_bits.begin(), m_bits.end(), lambda::_1 != block_type(0), queue
+ );
+ }
+
+ /// Returns \c true if all of the bits in the bitset are set to zero.
+ bool none(command_queue &queue) const
+ {
+ return !any(queue);
+ }
+
+ /// Sets all of the bits in the bitset to zero.
+ void reset(command_queue &queue)
+ {
+ fill(m_bits.begin(), m_bits.end(), block_type(0), queue);
+ }
+
+ /// Sets the bit at position \p n to zero.
+ void reset(size_type n, command_queue &queue)
+ {
+ set(n, false, queue);
+ }
+
+ /// Empties the bitset (e.g. \c resize(0)).
+ void clear()
+ {
+ m_bits.clear();
+ }
+
+ /// Returns the allocator used to allocate storage for the bitset.
+ allocator_type get_allocator() const
+ {
+ return m_bits.get_allocator();
+ }
+
+private:
+ container_type m_bits;
+ size_type m_size;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP
diff --git a/boost/compute/container/flat_map.hpp b/boost/compute/container/flat_map.hpp
new file mode 100644
index 0000000000..684c4da122
--- /dev/null
+++ b/boost/compute/container/flat_map.hpp
@@ -0,0 +1,406 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP
+#define BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP
+
+#include <cstddef>
+#include <utility>
+#include <exception>
+
+#include <boost/config.hpp>
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/exception.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/algorithm/lower_bound.hpp>
+#include <boost/compute/algorithm/upper_bound.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/types/pair.hpp>
+#include <boost/compute/detail/buffer_value.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class Key, class T>
+class flat_map
+{
+public:
+ typedef Key key_type;
+ typedef T mapped_type;
+ typedef typename ::boost::compute::vector<std::pair<Key, T> > vector_type;
+ typedef typename vector_type::value_type value_type;
+ typedef typename vector_type::size_type size_type;
+ typedef typename vector_type::difference_type difference_type;
+ typedef typename vector_type::reference reference;
+ typedef typename vector_type::const_reference const_reference;
+ typedef typename vector_type::pointer pointer;
+ typedef typename vector_type::const_pointer const_pointer;
+ typedef typename vector_type::iterator iterator;
+ typedef typename vector_type::const_iterator const_iterator;
+ typedef typename vector_type::reverse_iterator reverse_iterator;
+ typedef typename vector_type::const_reverse_iterator const_reverse_iterator;
+
+ explicit flat_map(const context &context = system::default_context())
+ : m_vector(context)
+ {
+ }
+
+ flat_map(const flat_map<Key, T> &other)
+ : m_vector(other.m_vector)
+ {
+ }
+
+ flat_map<Key, T>& operator=(const flat_map<Key, T> &other)
+ {
+ if(this != &other){
+ m_vector = other.m_vector;
+ }
+
+ return *this;
+ }
+
+ ~flat_map()
+ {
+ }
+
+ iterator begin()
+ {
+ return m_vector.begin();
+ }
+
+ const_iterator begin() const
+ {
+ return m_vector.begin();
+ }
+
+ const_iterator cbegin() const
+ {
+ return m_vector.cbegin();
+ }
+
+ iterator end()
+ {
+ return m_vector.end();
+ }
+
+ const_iterator end() const
+ {
+ return m_vector.end();
+ }
+
+ const_iterator cend() const
+ {
+ return m_vector.cend();
+ }
+
+ reverse_iterator rbegin()
+ {
+ return m_vector.rbegin();
+ }
+
+ const_reverse_iterator rbegin() const
+ {
+ return m_vector.rbegin();
+ }
+
+ const_reverse_iterator crbegin() const
+ {
+ return m_vector.crbegin();
+ }
+
+ reverse_iterator rend()
+ {
+ return m_vector.rend();
+ }
+
+ const_reverse_iterator rend() const
+ {
+ return m_vector.rend();
+ }
+
+ const_reverse_iterator crend() const
+ {
+ return m_vector.crend();
+ }
+
+ size_type size() const
+ {
+ return m_vector.size();
+ }
+
+ size_type max_size() const
+ {
+ return m_vector.max_size();
+ }
+
+ bool empty() const
+ {
+ return m_vector.empty();
+ }
+
+ size_type capacity() const
+ {
+ return m_vector.capacity();
+ }
+
+ void reserve(size_type size, command_queue &queue)
+ {
+ m_vector.reserve(size, queue);
+ }
+
+ void reserve(size_type size)
+ {
+ command_queue queue = m_vector.default_queue();
+ reserve(size, queue);
+ queue.finish();
+ }
+
+ void shrink_to_fit()
+ {
+ m_vector.shrink_to_fit();
+ }
+
+ void clear()
+ {
+ m_vector.clear();
+ }
+
+ std::pair<iterator, bool>
+ insert(const value_type &value, command_queue &queue)
+ {
+ iterator location = upper_bound(value.first, queue);
+
+ if(location != begin()){
+ value_type current_value;
+ ::boost::compute::copy_n(location - 1, 1, &current_value, queue);
+ if(value.first == current_value.first){
+ return std::make_pair(location - 1, false);
+ }
+ }
+
+ m_vector.insert(location, value);
+ return std::make_pair(location, true);
+ }
+
+ std::pair<iterator, bool> insert(const value_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ std::pair<iterator, bool> result = insert(value, queue);
+ queue.finish();
+ return result;
+ }
+
+ iterator erase(const const_iterator &position, command_queue &queue)
+ {
+ return erase(position, position + 1, queue);
+ }
+
+ iterator erase(const const_iterator &position)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = erase(position, queue);
+ queue.finish();
+ return iter;
+ }
+
+ iterator erase(const const_iterator &first,
+ const const_iterator &last,
+ command_queue &queue)
+ {
+ return m_vector.erase(first, last, queue);
+ }
+
+ iterator erase(const const_iterator &first, const const_iterator &last)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = erase(first, last, queue);
+ queue.finish();
+ return iter;
+ }
+
+ size_type erase(const key_type &value, command_queue &queue)
+ {
+ iterator position = find(value, queue);
+
+ if(position == end()){
+ return 0;
+ }
+ else {
+ erase(position, queue);
+ return 1;
+ }
+ }
+
+ iterator find(const key_type &value, command_queue &queue)
+ {
+ ::boost::compute::get<0> get_key;
+
+ return ::boost::compute::find(
+ ::boost::compute::make_transform_iterator(begin(), get_key),
+ ::boost::compute::make_transform_iterator(end(), get_key),
+ value,
+ queue
+ ).base();
+ }
+
+ iterator find(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = find(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const_iterator find(const key_type &value, command_queue &queue) const
+ {
+ ::boost::compute::get<0> get_key;
+
+ return ::boost::compute::find(
+ ::boost::compute::make_transform_iterator(begin(), get_key),
+ ::boost::compute::make_transform_iterator(end(), get_key),
+ value,
+ queue
+ ).base();
+ }
+
+ const_iterator find(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ const_iterator iter = find(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ size_type count(const key_type &value, command_queue &queue) const
+ {
+ return find(value, queue) != end() ? 1 : 0;
+ }
+
+ size_type count(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ size_type result = count(value, queue);
+ queue.finish();
+ return result;
+ }
+
+ iterator lower_bound(const key_type &value, command_queue &queue)
+ {
+ ::boost::compute::get<0> get_key;
+
+ return ::boost::compute::lower_bound(
+ ::boost::compute::make_transform_iterator(begin(), get_key),
+ ::boost::compute::make_transform_iterator(end(), get_key),
+ value,
+ queue
+ ).base();
+ }
+
+ iterator lower_bound(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = lower_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const_iterator lower_bound(const key_type &value, command_queue &queue) const
+ {
+ ::boost::compute::get<0> get_key;
+
+ return ::boost::compute::lower_bound(
+ ::boost::compute::make_transform_iterator(begin(), get_key),
+ ::boost::compute::make_transform_iterator(end(), get_key),
+ value,
+ queue
+ ).base();
+ }
+
+ const_iterator lower_bound(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ const_iterator iter = lower_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ iterator upper_bound(const key_type &value, command_queue &queue)
+ {
+ ::boost::compute::get<0> get_key;
+
+ return ::boost::compute::upper_bound(
+ ::boost::compute::make_transform_iterator(begin(), get_key),
+ ::boost::compute::make_transform_iterator(end(), get_key),
+ value,
+ queue
+ ).base();
+ }
+
+ iterator upper_bound(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = upper_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const_iterator upper_bound(const key_type &value, command_queue &queue) const
+ {
+ ::boost::compute::get<0> get_key;
+
+ return ::boost::compute::upper_bound(
+ ::boost::compute::make_transform_iterator(begin(), get_key),
+ ::boost::compute::make_transform_iterator(end(), get_key),
+ value,
+ queue
+ ).base();
+ }
+
+ const_iterator upper_bound(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ const_iterator iter = upper_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const mapped_type at(const key_type &key) const
+ {
+ const_iterator iter = find(key);
+ if(iter == end()){
+ BOOST_THROW_EXCEPTION(std::out_of_range("key not found"));
+ }
+
+ return value_type(*iter).second;
+ }
+
+ detail::buffer_value<mapped_type> operator[](const key_type &key)
+ {
+ iterator iter = find(key);
+ if(iter == end()){
+ iter = insert(std::make_pair(key, mapped_type())).first;
+ }
+
+ size_t index = iter.get_index() * sizeof(value_type) + sizeof(key_type);
+
+ return detail::buffer_value<mapped_type>(m_vector.get_buffer(), index);
+ }
+
+private:
+ ::boost::compute::vector<std::pair<Key, T> > m_vector;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP
diff --git a/boost/compute/container/flat_set.hpp b/boost/compute/container/flat_set.hpp
new file mode 100644
index 0000000000..8826f78846
--- /dev/null
+++ b/boost/compute/container/flat_set.hpp
@@ -0,0 +1,339 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP
+#define BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP
+
+#include <cstddef>
+#include <utility>
+
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/algorithm/lower_bound.hpp>
+#include <boost/compute/algorithm/upper_bound.hpp>
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T>
+class flat_set
+{
+public:
+ typedef T key_type;
+ typedef typename vector<T>::value_type value_type;
+ typedef typename vector<T>::size_type size_type;
+ typedef typename vector<T>::difference_type difference_type;
+ typedef typename vector<T>::reference reference;
+ typedef typename vector<T>::const_reference const_reference;
+ typedef typename vector<T>::pointer pointer;
+ typedef typename vector<T>::const_pointer const_pointer;
+ typedef typename vector<T>::iterator iterator;
+ typedef typename vector<T>::const_iterator const_iterator;
+ typedef typename vector<T>::reverse_iterator reverse_iterator;
+ typedef typename vector<T>::const_reverse_iterator const_reverse_iterator;
+
+ explicit flat_set(const context &context = system::default_context())
+ : m_vector(context)
+ {
+ }
+
+ flat_set(const flat_set<T> &other)
+ : m_vector(other.m_vector)
+ {
+ }
+
+ flat_set<T>& operator=(const flat_set<T> &other)
+ {
+ if(this != &other){
+ m_vector = other.m_vector;
+ }
+
+ return *this;
+ }
+
+ ~flat_set()
+ {
+ }
+
+ iterator begin()
+ {
+ return m_vector.begin();
+ }
+
+ const_iterator begin() const
+ {
+ return m_vector.begin();
+ }
+
+ const_iterator cbegin() const
+ {
+ return m_vector.cbegin();
+ }
+
+ iterator end()
+ {
+ return m_vector.end();
+ }
+
+ const_iterator end() const
+ {
+ return m_vector.end();
+ }
+
+ const_iterator cend() const
+ {
+ return m_vector.cend();
+ }
+
+ reverse_iterator rbegin()
+ {
+ return m_vector.rbegin();
+ }
+
+ const_reverse_iterator rbegin() const
+ {
+ return m_vector.rbegin();
+ }
+
+ const_reverse_iterator crbegin() const
+ {
+ return m_vector.crbegin();
+ }
+
+ reverse_iterator rend()
+ {
+ return m_vector.rend();
+ }
+
+ const_reverse_iterator rend() const
+ {
+ return m_vector.rend();
+ }
+
+ const_reverse_iterator crend() const
+ {
+ return m_vector.crend();
+ }
+
+ size_type size() const
+ {
+ return m_vector.size();
+ }
+
+ size_type max_size() const
+ {
+ return m_vector.max_size();
+ }
+
+ bool empty() const
+ {
+ return m_vector.empty();
+ }
+
+ size_type capacity() const
+ {
+ return m_vector.capacity();
+ }
+
+ void reserve(size_type size, command_queue &queue)
+ {
+ m_vector.reserve(size, queue);
+ }
+
+ void reserve(size_type size)
+ {
+ command_queue queue = m_vector.default_queue();
+ reserve(size, queue);
+ queue.finish();
+ }
+
+ void shrink_to_fit()
+ {
+ m_vector.shrink_to_fit();
+ }
+
+ void clear()
+ {
+ m_vector.clear();
+ }
+
+ std::pair<iterator, bool>
+ insert(const value_type &value, command_queue &queue)
+ {
+ iterator location = upper_bound(value, queue);
+
+ if(location != begin()){
+ value_type current_value;
+ ::boost::compute::copy_n(location - 1, 1, &current_value, queue);
+ if(value == current_value){
+ return std::make_pair(location - 1, false);
+ }
+ }
+
+ m_vector.insert(location, value, queue);
+ return std::make_pair(location, true);
+ }
+
+ std::pair<iterator, bool> insert(const value_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ std::pair<iterator, bool> result = insert(value, queue);
+ queue.finish();
+ return result;
+ }
+
+ iterator erase(const const_iterator &position, command_queue &queue)
+ {
+ return erase(position, position + 1, queue);
+ }
+
+ iterator erase(const const_iterator &position)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = erase(position, queue);
+ queue.finish();
+ return iter;
+ }
+
+ iterator erase(const const_iterator &first,
+ const const_iterator &last,
+ command_queue &queue)
+ {
+ return m_vector.erase(first, last, queue);
+ }
+
+ iterator erase(const const_iterator &first, const const_iterator &last)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = erase(first, last, queue);
+ queue.finish();
+ return iter;
+ }
+
+ size_type erase(const key_type &value, command_queue &queue)
+ {
+ iterator position = find(value, queue);
+
+ if(position == end()){
+ return 0;
+ }
+ else {
+ erase(position, queue);
+ return 1;
+ }
+ }
+
+ size_type erase(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ size_type result = erase(value, queue);
+ queue.finish();
+ return result;
+ }
+
+ iterator find(const key_type &value, command_queue &queue)
+ {
+ return ::boost::compute::find(begin(), end(), value, queue);
+ }
+
+ iterator find(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = find(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const_iterator find(const key_type &value, command_queue &queue) const
+ {
+ return ::boost::compute::find(begin(), end(), value, queue);
+ }
+
+ const_iterator find(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ const_iterator iter = find(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ size_type count(const key_type &value, command_queue &queue) const
+ {
+ return find(value, queue) != end() ? 1 : 0;
+ }
+
+ size_type count(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ size_type result = count(value, queue);
+ queue.finish();
+ return result;
+ }
+
+ iterator lower_bound(const key_type &value, command_queue &queue)
+ {
+ return ::boost::compute::lower_bound(begin(), end(), value, queue);
+ }
+
+ iterator lower_bound(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = lower_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const_iterator lower_bound(const key_type &value, command_queue &queue) const
+ {
+ return ::boost::compute::lower_bound(begin(), end(), value, queue);
+ }
+
+ const_iterator lower_bound(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ const_iterator iter = lower_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ iterator upper_bound(const key_type &value, command_queue &queue)
+ {
+ return ::boost::compute::upper_bound(begin(), end(), value, queue);
+ }
+
+ iterator upper_bound(const key_type &value)
+ {
+ command_queue queue = m_vector.default_queue();
+ iterator iter = upper_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ const_iterator upper_bound(const key_type &value, command_queue &queue) const
+ {
+ return ::boost::compute::upper_bound(begin(), end(), value, queue);
+ }
+
+ const_iterator upper_bound(const key_type &value) const
+ {
+ command_queue queue = m_vector.default_queue();
+ const_iterator iter = upper_bound(value, queue);
+ queue.finish();
+ return iter;
+ }
+
+private:
+ vector<T> m_vector;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP
diff --git a/boost/compute/container/mapped_view.hpp b/boost/compute/container/mapped_view.hpp
new file mode 100644
index 0000000000..59b1e4e0a4
--- /dev/null
+++ b/boost/compute/container/mapped_view.hpp
@@ -0,0 +1,250 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP
+#define BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP
+
+#include <cstddef>
+#include <exception>
+
+#include <boost/config.hpp>
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class mapped_view
+/// \brief A mapped view of host memory.
+///
+/// The mapped_view class simplifies mapping host-memory to a compute
+/// device. This allows for host-allocated memory to be used with the
+/// Boost.Compute algorithms.
+///
+/// The following example shows how to map a simple C-array containing
+/// data on the host to the device and run the reduce() algorithm to
+/// calculate the sum:
+///
+/// \snippet test/test_mapped_view.cpp reduce
+///
+/// \see buffer
+template<class T>
+class mapped_view
+{
+public:
+ typedef T value_type;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef buffer_iterator<T> iterator;
+ typedef buffer_iterator<T> const_iterator;
+
+ /// Creates a null mapped_view object.
+ mapped_view()
+ {
+ m_mapped_ptr = 0;
+ }
+
+ /// Creates a mapped_view for \p host_ptr with \p n elements. After
+ /// constructing a mapped_view the data is available for use by a
+ /// compute device. Use the \p unmap() method to make the updated data
+ /// available to the host.
+ mapped_view(T *host_ptr,
+ size_type n,
+ const context &context = system::default_context())
+ : m_buffer(_make_mapped_buffer(host_ptr, n, context))
+ {
+ m_mapped_ptr = 0;
+ }
+
+ /// Creates a read-only mapped_view for \p host_ptr with \p n elements.
+ /// After constructing a mapped_view the data is available for use by a
+ /// compute device. Use the \p unmap() method to make the updated data
+ /// available to the host.
+ mapped_view(const T *host_ptr,
+ size_type n,
+ const context &context = system::default_context())
+ : m_buffer(_make_mapped_buffer(host_ptr, n, context))
+ {
+ m_mapped_ptr = 0;
+ }
+
+ /// Creates a copy of \p other.
+ mapped_view(const mapped_view<T> &other)
+ : m_buffer(other.m_buffer)
+ {
+ m_mapped_ptr = 0;
+ }
+
+ /// Copies the mapped buffer from \p other.
+ mapped_view<T>& operator=(const mapped_view<T> &other)
+ {
+ if(this != &other){
+ m_buffer = other.m_buffer;
+ m_mapped_ptr = 0;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the mapped_view object.
+ ~mapped_view()
+ {
+ }
+
+ /// Returns an iterator to the first element in the mapped_view.
+ iterator begin()
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_buffer, 0);
+ }
+
+ /// Returns a const_iterator to the first element in the mapped_view.
+ const_iterator begin() const
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_buffer, 0);
+ }
+
+ /// Returns a const_iterator to the first element in the mapped_view.
+ const_iterator cbegin() const
+ {
+ return begin();
+ }
+
+ /// Returns an iterator to one past the last element in the mapped_view.
+ iterator end()
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_buffer, size());
+ }
+
+ /// Returns a const_iterator to one past the last element in the mapped_view.
+ const_iterator end() const
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_buffer, size());
+ }
+
+ /// Returns a const_iterator to one past the last element in the mapped_view.
+ const_iterator cend() const
+ {
+ return end();
+ }
+
+ /// Returns the number of elements in the mapped_view.
+ size_type size() const
+ {
+ return m_buffer.size() / sizeof(T);
+ }
+
+ /// Returns the host data pointer.
+ T* get_host_ptr()
+ {
+ return static_cast<T *>(m_buffer.get_info<void *>(CL_MEM_HOST_PTR));
+ }
+
+ /// Returns the host data pointer.
+ const T* get_host_ptr() const
+ {
+ return static_cast<T *>(m_buffer.get_info<void *>(CL_MEM_HOST_PTR));
+ }
+
+ /// Resizes the mapped_view to \p size elements.
+ void resize(size_type size)
+ {
+ T *old_ptr = get_host_ptr();
+
+ m_buffer = _make_mapped_buffer(old_ptr, size, m_buffer.get_context());
+ }
+
+ /// Returns \c true if the mapped_view is empty.
+ bool empty() const
+ {
+ return size() == 0;
+ }
+
+ /// Returns the mapped buffer.
+ const buffer& get_buffer() const
+ {
+ return m_buffer;
+ }
+
+ /// Maps the buffer into the host address space.
+ ///
+ /// \see_opencl_ref{clEnqueueMapBuffer}
+ void map(cl_map_flags flags, command_queue &queue)
+ {
+ BOOST_ASSERT(m_mapped_ptr == 0);
+
+ m_mapped_ptr = queue.enqueue_map_buffer(
+ m_buffer, flags, 0, m_buffer.size()
+ );
+ }
+
+ /// Maps the buffer into the host address space for reading and writing.
+ ///
+ /// Equivalent to:
+ /// \code
+ /// map(CL_MAP_READ | CL_MAP_WRITE, queue);
+ /// \endcode
+ void map(command_queue &queue)
+ {
+ map(CL_MAP_READ | CL_MAP_WRITE, queue);
+ }
+
+ /// Unmaps the buffer from the host address space.
+ ///
+ /// \see_opencl_ref{clEnqueueUnmapMemObject}
+ void unmap(command_queue &queue)
+ {
+ BOOST_ASSERT(m_mapped_ptr != 0);
+
+ queue.enqueue_unmap_buffer(m_buffer, m_mapped_ptr);
+
+ m_mapped_ptr = 0;
+ }
+
+private:
+ /// \internal_
+ static buffer _make_mapped_buffer(T *host_ptr,
+ size_t n,
+ const context &context)
+ {
+ return buffer(
+ context,
+ n * sizeof(T),
+ buffer::read_write | buffer::use_host_ptr,
+ host_ptr
+ );
+ }
+
+ /// \internal_
+ static buffer _make_mapped_buffer(const T *host_ptr,
+ size_t n,
+ const context &context)
+ {
+ return buffer(
+ context,
+ n * sizeof(T),
+ buffer::read_only | buffer::use_host_ptr,
+ const_cast<void *>(static_cast<const void *>(host_ptr))
+ );
+ }
+
+private:
+ buffer m_buffer;
+ void *m_mapped_ptr;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP
diff --git a/boost/compute/container/stack.hpp b/boost/compute/container/stack.hpp
new file mode 100644
index 0000000000..dc86df459a
--- /dev/null
+++ b/boost/compute/container/stack.hpp
@@ -0,0 +1,81 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_STACK_HPP
+#define BOOST_COMPUTE_CONTAINER_STACK_HPP
+
+#include <boost/compute/container/vector.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T>
+class stack
+{
+public:
+ typedef vector<T> container_type;
+ typedef typename container_type::size_type size_type;
+ typedef typename container_type::value_type value_type;
+
+ stack()
+ {
+ }
+
+ stack(const stack<T> &other)
+ : m_vector(other.m_vector)
+ {
+ }
+
+ stack<T>& operator=(const stack<T> &other)
+ {
+ if(this != &other){
+ m_vector = other.m_vector;
+ }
+
+ return *this;
+ }
+
+ ~stack()
+ {
+ }
+
+ bool empty() const
+ {
+ return m_vector.empty();
+ }
+
+ size_type size() const
+ {
+ return m_vector.size();
+ }
+
+ value_type top() const
+ {
+ return m_vector.back();
+ }
+
+ void push(const T &value)
+ {
+ m_vector.push_back(value);
+ }
+
+ void pop()
+ {
+ m_vector.pop_back();
+ }
+
+private:
+ container_type m_vector;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_STACK_HPP
diff --git a/boost/compute/container/string.hpp b/boost/compute/container/string.hpp
new file mode 100644
index 0000000000..a721ab5746
--- /dev/null
+++ b/boost/compute/container/string.hpp
@@ -0,0 +1,25 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_STRING_HPP
+#define BOOST_COMPUTE_CONTAINER_STRING_HPP
+
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/container/basic_string.hpp>
+
+namespace boost {
+namespace compute {
+
+typedef basic_string<char_> string;
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_STRING_HPP
diff --git a/boost/compute/container/valarray.hpp b/boost/compute/container/valarray.hpp
new file mode 100644
index 0000000000..8ac8e01753
--- /dev/null
+++ b/boost/compute/container/valarray.hpp
@@ -0,0 +1,499 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_VALARRAY_HPP
+#define BOOST_COMPUTE_CONTAINER_VALARRAY_HPP
+
+#include <cstddef>
+#include <valarray>
+
+#include <boost/static_assert.hpp>
+#include <boost/type_traits.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/max_element.hpp>
+#include <boost/compute/algorithm/min_element.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/algorithm/accumulate.hpp>
+#include <boost/compute/detail/buffer_value.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/functional/bind.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T>
+class valarray
+{
+public:
+ explicit valarray(const context &context = system::default_context())
+ : m_buffer(context, 0)
+ {
+ }
+
+ explicit valarray(size_t size,
+ const context &context = system::default_context())
+ : m_buffer(context, size * sizeof(T))
+ {
+ }
+
+ valarray(const T &value,
+ size_t size,
+ const context &context = system::default_context())
+ : m_buffer(context, size * sizeof(T))
+ {
+ fill(begin(), end(), value);
+ }
+
+ valarray(const T *values,
+ size_t size,
+ const context &context = system::default_context())
+ : m_buffer(context, size * sizeof(T))
+ {
+ copy(values, values + size, begin());
+ }
+
+ valarray(const valarray<T> &other)
+ : m_buffer(other.m_buffer.get_context(), other.size() * sizeof(T))
+ {
+ }
+
+ valarray(const std::valarray<T> &valarray,
+ const context &context = system::default_context())
+ : m_buffer(context, valarray.size() * sizeof(T))
+ {
+ copy(&valarray[0], &valarray[valarray.size()], begin());
+ }
+
+ valarray<T>& operator=(const valarray<T> &other)
+ {
+ if(this != &other){
+ // change to other's OpenCL context
+ m_buffer = buffer(other.m_buffer.get_context(), other.size() * sizeof(T));
+ copy(other.begin(), other.end(), begin());
+ }
+
+ return *this;
+ }
+
+ valarray<T>& operator=(const std::valarray<T> &valarray)
+ {
+ m_buffer = buffer(m_buffer.get_context(), valarray.size() * sizeof(T));
+ copy(&valarray[0], &valarray[valarray.size()], begin());
+
+ return *this;
+ }
+
+ valarray<T>& operator*=(const T&);
+
+ valarray<T>& operator/=(const T&);
+
+ valarray<T>& operator%=(const T& val);
+
+ valarray<T> operator+() const
+ {
+ // This operator can be used with any type.
+ valarray<T> result(size());
+ copy(begin(), end(), result.begin());
+ return result;
+ }
+
+ valarray<T> operator-() const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ is_fundamental<T>::value,
+ "This operator can be used with all OpenCL built-in scalar"
+ " and vector types"
+ );
+ valarray<T> result(size());
+ BOOST_COMPUTE_FUNCTION(T, unary_minus, (T x),
+ {
+ return -x;
+ });
+ transform(begin(), end(), result.begin(), unary_minus);
+ return result;
+ }
+
+ valarray<T> operator~() const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ is_fundamental<T>::value &&
+ !is_floating_point<typename scalar_type<T>::type>::value,
+ "This operator can be used with all OpenCL built-in scalar"
+ " and vector types except the built-in scalar and vector float types"
+ );
+ valarray<T> result(size());
+ BOOST_COMPUTE_FUNCTION(T, bitwise_not, (T x),
+ {
+ return ~x;
+ });
+ transform(begin(), end(), result.begin(), bitwise_not);
+ return result;
+ }
+
+ /// In OpenCL there cannot be memory buffer with bool type, for
+ /// this reason return type is valarray<char> instead of valarray<bool>.
+ /// 1 means true, 0 means false.
+ valarray<char> operator!() const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ is_fundamental<T>::value,
+ "This operator can be used with all OpenCL built-in scalar"
+ " and vector types"
+ );
+ valarray<char> result(size());
+ BOOST_COMPUTE_FUNCTION(char, logical_not, (T x),
+ {
+ return !x;
+ });
+ transform(begin(), end(), &result[0], logical_not);
+ return result;
+ }
+
+ valarray<T>& operator+=(const T&);
+
+ valarray<T>& operator-=(const T&);
+
+ valarray<T>& operator^=(const T&);
+
+ valarray<T>& operator&=(const T&);
+
+ valarray<T>& operator|=(const T&);
+
+ valarray<T>& operator<<=(const T&);
+
+ valarray<T>& operator>>=(const T&);
+
+ valarray<T>& operator*=(const valarray<T>&);
+
+ valarray<T>& operator/=(const valarray<T>&);
+
+ valarray<T>& operator%=(const valarray<T>&);
+
+ valarray<T>& operator+=(const valarray<T>&);
+
+ valarray<T>& operator-=(const valarray<T>&);
+
+ valarray<T>& operator^=(const valarray<T>&);
+
+ valarray<T>& operator&=(const valarray<T>&);
+
+ valarray<T>& operator|=(const valarray<T>&);
+
+ valarray<T>& operator<<=(const valarray<T>&);
+
+ valarray<T>& operator>>=(const valarray<T>&);
+
+ ~valarray()
+ {
+
+ }
+
+ size_t size() const
+ {
+ return m_buffer.size() / sizeof(T);
+ }
+
+ void resize(size_t size, T value = T())
+ {
+ m_buffer = buffer(m_buffer.get_context(), size * sizeof(T));
+ fill(begin(), end(), value);
+ }
+
+ detail::buffer_value<T> operator[](size_t index)
+ {
+ return *(begin() + static_cast<ptrdiff_t>(index));
+ }
+
+ const detail::buffer_value<T> operator[](size_t index) const
+ {
+ return *(begin() + static_cast<ptrdiff_t>(index));
+ }
+
+ T (min)() const
+ {
+ return *(boost::compute::min_element(begin(), end()));
+ }
+
+ T (max)() const
+ {
+ return *(boost::compute::max_element(begin(), end()));
+ }
+
+ T sum() const
+ {
+ return boost::compute::accumulate(begin(), end(), T(0));
+ }
+
+ template<class UnaryFunction>
+ valarray<T> apply(UnaryFunction function) const
+ {
+ valarray<T> result(size());
+ transform(begin(), end(), result.begin(), function);
+ return result;
+ }
+
+ const buffer& get_buffer() const
+ {
+ return m_buffer;
+ }
+
+
+private:
+ buffer_iterator<T> begin() const
+ {
+ return buffer_iterator<T>(m_buffer, 0);
+ }
+
+ buffer_iterator<T> end() const
+ {
+ return buffer_iterator<T>(m_buffer, size());
+ }
+
+private:
+ buffer m_buffer;
+};
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, assert) \
+ template<class T> \
+ inline valarray<T>& \
+ valarray<T>::operator op##=(const T& val) \
+ { \
+ assert \
+ transform(begin(), end(), begin(), \
+ ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \
+ return *this; \
+ } \
+ \
+ template<class T> \
+ inline valarray<T>& \
+ valarray<T>::operator op##=(const valarray<T> &rhs) \
+ { \
+ assert \
+ transform(begin(), end(), rhs.begin(), begin(), op_name<T>()); \
+ return *this; \
+ }
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(op, op_name) \
+ BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types" \
+ ); \
+ )
+
+/// \internal_
+/// For some operators class T can't be floating point type.
+/// See OpenCL specification, operators chapter.
+#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(op, op_name) \
+ BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value && \
+ !is_floating_point<typename scalar_type<T>::type>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types except the built-in scalar and vector float types" \
+ ); \
+ )
+
+// defining operators
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(+, plus)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(-, minus)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(*, multiplies)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(/, divides)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(^, bit_xor)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(&, bit_and)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(|, bit_or)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(<<, shift_left)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(>>, shift_right)
+
+// The remainder (%) operates on
+// integer scalar and integer vector data types only.
+// See OpenCL specification.
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(%, modulus,
+ BOOST_STATIC_ASSERT_MSG(
+ is_integral<typename scalar_type<T>::type>::value,
+ "This operator can be used only with OpenCL built-in integer types"
+ );
+)
+
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP
+
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT
+
+/// \internal_
+/// Macro for defining binary operators for valarray
+#define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, assert) \
+ template<class T> \
+ valarray<T> operator op (const valarray<T>& lhs, const valarray<T>& rhs) \
+ { \
+ assert \
+ valarray<T> result(lhs.size()); \
+ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \
+ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \
+ buffer_iterator<T>(rhs.get_buffer(), 0), \
+ buffer_iterator<T>(result.get_buffer(), 0), \
+ op_name<T>()); \
+ return result; \
+ } \
+ \
+ template<class T> \
+ valarray<T> operator op (const T& val, const valarray<T>& rhs) \
+ { \
+ assert \
+ valarray<T> result(rhs.size()); \
+ transform(buffer_iterator<T>(rhs.get_buffer(), 0), \
+ buffer_iterator<T>(rhs.get_buffer(), rhs.size()), \
+ buffer_iterator<T>(result.get_buffer(), 0), \
+ ::boost::compute::bind(op_name<T>(), val, placeholders::_1)); \
+ return result; \
+ } \
+ \
+ template<class T> \
+ valarray<T> operator op (const valarray<T>& lhs, const T& val) \
+ { \
+ assert \
+ valarray<T> result(lhs.size()); \
+ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \
+ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \
+ buffer_iterator<T>(result.get_buffer(), 0), \
+ ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \
+ return result; \
+ }
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(op, op_name) \
+ BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types" \
+ ); \
+ )
+
+/// \internal_
+/// For some operators class T can't be floating point type.
+/// See OpenCL specification, operators chapter.
+#define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(op, op_name) \
+ BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value && \
+ !is_floating_point<typename scalar_type<T>::type>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types except the built-in scalar and vector float types" \
+ ); \
+ )
+
+// defining binary operators for valarray
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(+, plus)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(-, minus)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(*, multiplies)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(/, divides)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(^, bit_xor)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(&, bit_and)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(|, bit_or)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(<<, shift_left)
+BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(>>, shift_right)
+
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP
+
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR
+
+/// \internal_
+/// Macro for defining valarray comparison operators.
+/// For return type valarray<char> is used instead of valarray<bool> because
+/// in OpenCL there cannot be memory buffer with bool type.
+///
+/// Note it's also used for defining binary logical operators (==, &&)
+#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) \
+ template<class T> \
+ valarray<char> operator op (const valarray<T>& lhs, const valarray<T>& rhs) \
+ { \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types" \
+ ); \
+ valarray<char> result(lhs.size()); \
+ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \
+ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \
+ buffer_iterator<T>(rhs.get_buffer(), 0), \
+ buffer_iterator<char>(result.get_buffer(), 0), \
+ op_name<T>()); \
+ return result; \
+ } \
+ \
+ template<class T> \
+ valarray<char> operator op (const T& val, const valarray<T>& rhs) \
+ { \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types" \
+ ); \
+ valarray<char> result(rhs.size()); \
+ transform(buffer_iterator<T>(rhs.get_buffer(), 0), \
+ buffer_iterator<T>(rhs.get_buffer(), rhs.size()), \
+ buffer_iterator<char>(result.get_buffer(), 0), \
+ ::boost::compute::bind(op_name<T>(), val, placeholders::_1)); \
+ return result; \
+ } \
+ \
+ template<class T> \
+ valarray<char> operator op (const valarray<T>& lhs, const T& val) \
+ { \
+ BOOST_STATIC_ASSERT_MSG( \
+ is_fundamental<T>::value, \
+ "This operator can be used with all OpenCL built-in scalar" \
+ " and vector types" \
+ ); \
+ valarray<char> result(lhs.size()); \
+ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \
+ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \
+ buffer_iterator<char>(result.get_buffer(), 0), \
+ ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \
+ return result; \
+ }
+
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(==, equal_to)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(!=, not_equal_to)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>, greater)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<, less)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>=, greater_equal)
+BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<=, less_equal)
+
+/// \internal_
+/// Macro for defining binary logical operators for valarray.
+///
+/// For return type valarray<char> is used instead of valarray<bool> because
+/// in OpenCL there cannot be memory buffer with bool type.
+/// 1 means true, 0 means false.
+#define BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(op, op_name) \
+ BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name)
+
+BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(&&, logical_and)
+BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(||, logical_or)
+
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR
+
+#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_VALARRAY_HPP
diff --git a/boost/compute/container/vector.hpp b/boost/compute/container/vector.hpp
new file mode 100644
index 0000000000..47d649ad99
--- /dev/null
+++ b/boost/compute/container/vector.hpp
@@ -0,0 +1,761 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTAINER_VECTOR_HPP
+#define BOOST_COMPUTE_CONTAINER_VECTOR_HPP
+
+#include <vector>
+#include <cstddef>
+#include <iterator>
+#include <exception>
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/config.hpp>
+
+#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+#include <initializer_list>
+#endif
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/algorithm/fill_n.hpp>
+#include <boost/compute/allocator/buffer_allocator.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/detail/capture_traits.hpp>
+#include <boost/compute/detail/buffer_value.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class vector
+/// \brief A resizable array of values.
+///
+/// The vector<T> class stores a dynamic array of values. Internally, the data
+/// is stored in an OpenCL buffer object.
+///
+/// The vector class is the prefered container for storing and accessing data
+/// on a compute device. In most cases it should be used instead of directly
+/// dealing with buffer objects. If the undelying buffer is needed, it can be
+/// accessed with the get_buffer() method.
+///
+/// The internal storage is allocated in a specific OpenCL context which is
+/// passed as an argument to the constructor when the vector is created.
+///
+/// For example, to create a vector on the device containing space for ten
+/// \c int values:
+/// \code
+/// boost::compute::vector<int> vec(10, context);
+/// \endcode
+///
+/// Allocation and data transfer can also be performed in a single step:
+/// \code
+/// // values on the host
+/// int data[] = { 1, 2, 3, 4 };
+///
+/// // create a vector of size four and copy the values from data
+/// boost::compute::vector<int> vec(data, data + 4, queue);
+/// \endcode
+///
+/// The Boost.Compute \c vector class provides a STL-like API and is modeled
+/// after the \c std::vector class from the C++ standard library. It can be
+/// used with any of the STL-like algorithms provided by Boost.Compute
+/// including \c copy(), \c transform(), and \c sort() (among many others).
+///
+/// For example:
+/// \code
+/// // a vector on a compute device
+/// boost::compute::vector<float> vec = ...
+///
+/// // copy data to the vector from a host std:vector
+/// boost::compute::copy(host_vec.begin(), host_vec.end(), vec.begin(), queue);
+///
+/// // copy data from the vector to a host std::vector
+/// boost::compute::copy(vec.begin(), vec.end(), host_vec.begin(), queue);
+///
+/// // sort the values in the vector
+/// boost::compute::sort(vec.begin(), vec.end(), queue);
+///
+/// // calculate the sum of the values in the vector (also see reduce())
+/// float sum = boost::compute::accumulate(vec.begin(), vec.end(), 0, queue);
+///
+/// // reverse the values in the vector
+/// boost::compute::reverse(vec.begin(), vec.end(), queue);
+///
+/// // fill the vector with ones
+/// boost::compute::fill(vec.begin(), vec.end(), 1, queue);
+/// \endcode
+///
+/// \see \ref array "array<T, N>", buffer
+template<class T, class Alloc = buffer_allocator<T> >
+class vector
+{
+public:
+ typedef T value_type;
+ typedef Alloc allocator_type;
+ typedef typename allocator_type::size_type size_type;
+ typedef typename allocator_type::difference_type difference_type;
+ typedef detail::buffer_value<T> reference;
+ typedef const detail::buffer_value<T> const_reference;
+ typedef typename allocator_type::pointer pointer;
+ typedef typename allocator_type::const_pointer const_pointer;
+ typedef buffer_iterator<T> iterator;
+ typedef buffer_iterator<T> const_iterator;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+ /// Creates an empty vector in \p context.
+ explicit vector(const context &context = system::default_context())
+ : m_size(0),
+ m_allocator(context)
+ {
+ m_data = m_allocator.allocate(_minimum_capacity());
+ }
+
+ /// Creates a vector with space for \p count elements in \p context.
+ ///
+ /// Note that unlike \c std::vector's constructor, this will not initialize
+ /// the values in the container. Either call the vector constructor which
+ /// takes a value to initialize with or use the fill() algorithm to set
+ /// the initial values.
+ ///
+ /// For example:
+ /// \code
+ /// // create a vector on the device with space for ten ints
+ /// boost::compute::vector<int> vec(10, context);
+ /// \endcode
+ explicit vector(size_type count,
+ const context &context = system::default_context())
+ : m_size(count),
+ m_allocator(context)
+ {
+ m_data = m_allocator.allocate((std::max)(count, _minimum_capacity()));
+ }
+
+ /// Creates a vector with space for \p count elements and sets each equal
+ /// to \p value.
+ ///
+ /// For example:
+ /// \code
+ /// // creates a vector with four values set to nine (e.g. [9, 9, 9, 9]).
+ /// boost::compute::vector<int> vec(4, 9, queue);
+ /// \endcode
+ vector(size_type count,
+ const T &value,
+ command_queue &queue = system::default_queue())
+ : m_size(count),
+ m_allocator(queue.get_context())
+ {
+ m_data = m_allocator.allocate((std::max)(count, _minimum_capacity()));
+
+ ::boost::compute::fill_n(begin(), count, value, queue);
+ }
+
+ /// Creates a vector with space for the values in the range [\p first,
+ /// \p last) and copies them into the vector with \p queue.
+ ///
+ /// For example:
+ /// \code
+ /// // values on the host
+ /// int data[] = { 1, 2, 3, 4 };
+ ///
+ /// // create a vector of size four and copy the values from data
+ /// boost::compute::vector<int> vec(data, data + 4, queue);
+ /// \endcode
+ template<class InputIterator>
+ vector(InputIterator first,
+ InputIterator last,
+ command_queue &queue = system::default_queue())
+ : m_size(detail::iterator_range_size(first, last)),
+ m_allocator(queue.get_context())
+ {
+ m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity()));
+
+ ::boost::compute::copy(first, last, begin(), queue);
+ }
+
+ /// Creates a new vector and copies the values from \p other.
+ vector(const vector &other,
+ command_queue &queue = system::default_queue())
+ : m_size(other.m_size),
+ m_allocator(other.m_allocator)
+ {
+ m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity()));
+
+ if(!other.empty()){
+ ::boost::compute::copy(other.begin(), other.end(), begin(), queue);
+ queue.finish();
+ }
+ }
+
+ /// Creates a new vector and copies the values from \p other.
+ template<class OtherAlloc>
+ vector(const vector<T, OtherAlloc> &other,
+ command_queue &queue = system::default_queue())
+ : m_size(other.size()),
+ m_allocator(queue.get_context())
+ {
+ m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity()));
+
+ if(!other.empty()){
+ ::boost::compute::copy(other.begin(), other.end(), begin(), queue);
+ queue.finish();
+ }
+ }
+
+ /// Creates a new vector and copies the values from \p vector.
+ template<class OtherAlloc>
+ vector(const std::vector<T, OtherAlloc> &vector,
+ command_queue &queue = system::default_queue())
+ : m_size(vector.size()),
+ m_allocator(queue.get_context())
+ {
+ m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity()));
+
+ ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue);
+ }
+
+ #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+ vector(std::initializer_list<T> list,
+ command_queue &queue = system::default_queue())
+ : m_size(list.size()),
+ m_allocator(queue.get_context())
+ {
+ m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity()));
+
+ ::boost::compute::copy(list.begin(), list.end(), begin(), queue);
+ }
+ #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+
+ vector& operator=(const vector &other)
+ {
+ if(this != &other){
+ command_queue queue = default_queue();
+ resize(other.size(), queue);
+ ::boost::compute::copy(other.begin(), other.end(), begin(), queue);
+ queue.finish();
+ }
+
+ return *this;
+ }
+
+ template<class OtherAlloc>
+ vector& operator=(const std::vector<T, OtherAlloc> &vector)
+ {
+ command_queue queue = default_queue();
+ resize(vector.size(), queue);
+ ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue);
+ queue.finish();
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new vector from \p other.
+ vector(vector&& other)
+ : m_data(std::move(other.m_data)),
+ m_size(other.m_size),
+ m_allocator(std::move(other.m_allocator))
+ {
+ other.m_size = 0;
+ }
+
+ /// Move-assigns the data from \p other to \c *this.
+ vector& operator=(vector&& other)
+ {
+ if(m_size){
+ m_allocator.deallocate(m_data, m_size);
+ }
+
+ m_data = std::move(other.m_data);
+ m_size = other.m_size;
+ m_allocator = std::move(other.m_allocator);
+
+ other.m_size = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the vector object.
+ ~vector()
+ {
+ if(m_size){
+ m_allocator.deallocate(m_data, m_size);
+ }
+ }
+
+ iterator begin()
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), 0);
+ }
+
+ const_iterator begin() const
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), 0);
+ }
+
+ const_iterator cbegin() const
+ {
+ return begin();
+ }
+
+ iterator end()
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), m_size);
+ }
+
+ const_iterator end() const
+ {
+ return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), m_size);
+ }
+
+ const_iterator cend() const
+ {
+ return end();
+ }
+
+ reverse_iterator rbegin()
+ {
+ return reverse_iterator(end() - 1);
+ }
+
+ const_reverse_iterator rbegin() const
+ {
+ return reverse_iterator(end() - 1);
+ }
+
+ const_reverse_iterator crbegin() const
+ {
+ return rbegin();
+ }
+
+ reverse_iterator rend()
+ {
+ return reverse_iterator(begin() - 1);
+ }
+
+ const_reverse_iterator rend() const
+ {
+ return reverse_iterator(begin() - 1);
+ }
+
+ const_reverse_iterator crend() const
+ {
+ return rend();
+ }
+
+ /// Returns the number of elements in the vector.
+ size_type size() const
+ {
+ return m_size;
+ }
+
+ size_type max_size() const
+ {
+ return m_allocator.max_size();
+ }
+
+ /// Resizes the vector to \p size.
+ void resize(size_type size, command_queue &queue)
+ {
+ if(size < capacity()){
+ m_size = size;
+ }
+ else {
+ // allocate new buffer
+ pointer new_data =
+ m_allocator.allocate(
+ static_cast<size_type>(
+ static_cast<float>(size) * _growth_factor()
+ )
+ );
+
+ // copy old values to the new buffer
+ ::boost::compute::copy(m_data, m_data + m_size, new_data, queue);
+
+ // free old memory
+ m_allocator.deallocate(m_data, m_size);
+
+ // set new data and size
+ m_data = new_data;
+ m_size = size;
+ }
+ }
+
+ /// \overload
+ void resize(size_type size)
+ {
+ command_queue queue = default_queue();
+ resize(size, queue);
+ queue.finish();
+ }
+
+ /// Returns \c true if the vector is empty.
+ bool empty() const
+ {
+ return m_size == 0;
+ }
+
+ /// Returns the capacity of the vector.
+ size_type capacity() const
+ {
+ return m_data.get_buffer().size() / sizeof(T);
+ }
+
+ void reserve(size_type size, command_queue &queue)
+ {
+ (void) size;
+ (void) queue;
+ }
+
+ void reserve(size_type size)
+ {
+ command_queue queue = default_queue();
+ reserve(size, queue);
+ queue.finish();
+ }
+
+ void shrink_to_fit(command_queue &queue)
+ {
+ (void) queue;
+ }
+
+ void shrink_to_fit()
+ {
+ command_queue queue = default_queue();
+ shrink_to_fit(queue);
+ queue.finish();
+ }
+
+ reference operator[](size_type index)
+ {
+ return *(begin() + static_cast<difference_type>(index));
+ }
+
+ const_reference operator[](size_type index) const
+ {
+ return *(begin() + static_cast<difference_type>(index));
+ }
+
+ reference at(size_type index)
+ {
+ if(index >= size()){
+ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range"));
+ }
+
+ return operator[](index);
+ }
+
+ const_reference at(size_type index) const
+ {
+ if(index >= size()){
+ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range"));
+ }
+
+ return operator[](index);
+ }
+
+ reference front()
+ {
+ return *begin();
+ }
+
+ const_reference front() const
+ {
+ return *begin();
+ }
+
+ reference back()
+ {
+ return *(end() - static_cast<difference_type>(1));
+ }
+
+ const_reference back() const
+ {
+ return *(end() - static_cast<difference_type>(1));
+ }
+
+ template<class InputIterator>
+ void assign(InputIterator first,
+ InputIterator last,
+ command_queue &queue)
+ {
+ // resize vector for new contents
+ resize(detail::iterator_range_size(first, last), queue);
+
+ // copy values into the vector
+ ::boost::compute::copy(first, last, begin(), queue);
+ }
+
+ template<class InputIterator>
+ void assign(InputIterator first, InputIterator last)
+ {
+ command_queue queue = default_queue();
+ assign(first, last, queue);
+ queue.finish();
+ }
+
+ void assign(size_type n, const T &value, command_queue &queue)
+ {
+ // resize vector for new contents
+ resize(n, queue);
+
+ // fill vector with value
+ ::boost::compute::fill_n(begin(), n, value, queue);
+ }
+
+ void assign(size_type n, const T &value)
+ {
+ command_queue queue = default_queue();
+ assign(n, value, queue);
+ queue.finish();
+ }
+
+ /// Inserts \p value at the end of the vector (resizing if neccessary).
+ ///
+ /// Note that calling \c push_back() to insert data values one at a time
+ /// is inefficient as there is a non-trivial overhead in performing a data
+ /// transfer to the device. It is usually better to store a set of values
+ /// on the host (for example, in a \c std::vector) and then transfer them
+ /// in bulk using the \c insert() method or the copy() algorithm.
+ void push_back(const T &value, command_queue &queue)
+ {
+ insert(end(), value, queue);
+ }
+
+ /// \overload
+ void push_back(const T &value)
+ {
+ command_queue queue = default_queue();
+ push_back(value, queue);
+ queue.finish();
+ }
+
+ void pop_back(command_queue &queue)
+ {
+ resize(size() - 1, queue);
+ }
+
+ void pop_back()
+ {
+ command_queue queue = default_queue();
+ pop_back(queue);
+ queue.finish();
+ }
+
+ iterator insert(iterator position, const T &value, command_queue &queue)
+ {
+ if(position == end()){
+ resize(m_size + 1, queue);
+ position = begin() + position.get_index();
+ ::boost::compute::copy_n(&value, 1, position, queue);
+ }
+ else {
+ ::boost::compute::vector<T, Alloc> tmp(position, end(), queue);
+ resize(m_size + 1, queue);
+ position = begin() + position.get_index();
+ ::boost::compute::copy_n(&value, 1, position, queue);
+ ::boost::compute::copy(tmp.begin(), tmp.end(), position + 1, queue);
+ }
+
+ return position + 1;
+ }
+
+ iterator insert(iterator position, const T &value)
+ {
+ command_queue queue = default_queue();
+ iterator iter = insert(position, value, queue);
+ queue.finish();
+ return iter;
+ }
+
+ void insert(iterator position,
+ size_type count,
+ const T &value,
+ command_queue &queue)
+ {
+ ::boost::compute::vector<T, Alloc> tmp(position, end(), queue);
+ resize(size() + count, queue);
+
+ position = begin() + position.get_index();
+
+ ::boost::compute::fill_n(position, count, value, queue);
+ ::boost::compute::copy(
+ tmp.begin(),
+ tmp.end(),
+ position + static_cast<difference_type>(count),
+ queue
+ );
+ }
+
+ void insert(iterator position, size_type count, const T &value)
+ {
+ command_queue queue = default_queue();
+ insert(position, count, value, queue);
+ queue.finish();
+ }
+
+ /// Inserts the values in the range [\p first, \p last) into the vector at
+ /// \p position using \p queue.
+ template<class InputIterator>
+ void insert(iterator position,
+ InputIterator first,
+ InputIterator last,
+ command_queue &queue)
+ {
+ ::boost::compute::vector<T, Alloc> tmp(position, end(), queue);
+
+ size_type count = detail::iterator_range_size(first, last);
+ resize(size() + count, queue);
+
+ position = begin() + position.get_index();
+
+ ::boost::compute::copy(first, last, position, queue);
+ ::boost::compute::copy(
+ tmp.begin(),
+ tmp.end(),
+ position + static_cast<difference_type>(count),
+ queue
+ );
+ }
+
+ /// \overload
+ template<class InputIterator>
+ void insert(iterator position, InputIterator first, InputIterator last)
+ {
+ command_queue queue = default_queue();
+ insert(position, first, last, queue);
+ queue.finish();
+ }
+
+ iterator erase(iterator position, command_queue &queue)
+ {
+ return erase(position, position + 1, queue);
+ }
+
+ iterator erase(iterator position)
+ {
+ command_queue queue = default_queue();
+ iterator iter = erase(position, queue);
+ queue.finish();
+ return iter;
+ }
+
+ iterator erase(iterator first, iterator last, command_queue &queue)
+ {
+ if(last != end()){
+ ::boost::compute::vector<T, Alloc> tmp(last, end(), queue);
+ ::boost::compute::copy(tmp.begin(), tmp.end(), first, queue);
+ }
+
+ difference_type count = std::distance(first, last);
+ resize(size() - static_cast<size_type>(count), queue);
+
+ return begin() + first.get_index() + count;
+ }
+
+ iterator erase(iterator first, iterator last)
+ {
+ command_queue queue = default_queue();
+ iterator iter = erase(first, last, queue);
+ queue.finish();
+ return iter;
+ }
+
+ /// Swaps the contents of \c *this with \p other.
+ void swap(vector &other)
+ {
+ std::swap(m_data, other.m_data);
+ std::swap(m_size, other.m_size);
+ std::swap(m_allocator, other.m_allocator);
+ }
+
+ /// Removes all elements from the vector.
+ void clear()
+ {
+ m_size = 0;
+ }
+
+ allocator_type get_allocator() const
+ {
+ return m_allocator;
+ }
+
+ /// Returns the underlying buffer.
+ const buffer& get_buffer() const
+ {
+ return m_data.get_buffer();
+ }
+
+ /// \internal_
+ ///
+ /// Returns a command queue usable to issue commands for the vector's
+ /// memory buffer. This is used when a member function is called without
+ /// specifying an existing command queue to use.
+ command_queue default_queue() const
+ {
+ const context &context = m_allocator.get_context();
+ command_queue queue(context, context.get_device());
+ return queue;
+ }
+
+private:
+ /// \internal_
+ BOOST_CONSTEXPR size_type _minimum_capacity() const { return 4; }
+
+ /// \internal_
+ BOOST_CONSTEXPR float _growth_factor() const { return 1.5; }
+
+private:
+ pointer m_data;
+ size_type m_size;
+ allocator_type m_allocator;
+};
+
+namespace detail {
+
+// set_kernel_arg specialization for vector<T>
+template<class T, class Alloc>
+struct set_kernel_arg<vector<T, Alloc> >
+{
+ void operator()(kernel &kernel_, size_t index, const vector<T, Alloc> &vector)
+ {
+ kernel_.set_arg(index, vector.get_buffer());
+ }
+};
+
+// for capturing vector<T> with BOOST_COMPUTE_CLOSURE()
+template<class T, class Alloc>
+struct capture_traits<vector<T, Alloc> >
+{
+ static std::string type_name()
+ {
+ return std::string("__global ") + ::boost::compute::type_name<T>() + "*";
+ }
+};
+
+// meta_kernel streaming operator for vector<T>
+template<class T, class Alloc>
+meta_kernel& operator<<(meta_kernel &k, const vector<T, Alloc> &vector)
+{
+ return k << k.get_buffer_identifier<T>(vector.get_buffer());
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTAINER_VECTOR_HPP
diff --git a/boost/compute/context.hpp b/boost/compute/context.hpp
new file mode 100644
index 0000000000..5db39e9d83
--- /dev/null
+++ b/boost/compute/context.hpp
@@ -0,0 +1,245 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CONTEXT_HPP
+#define BOOST_COMPUTE_CONTEXT_HPP
+
+#include <vector>
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class context
+/// \brief A compute context.
+///
+/// The context class represents a compute context.
+///
+/// A context object manages a set of OpenCL resources including memory
+/// buffers and program objects. Before allocating memory on the device or
+/// executing kernels you must set up a context object.
+///
+/// To create a context for the default device on the system:
+/// \code
+/// // get the default compute device
+/// boost::compute::device gpu = boost::compute::system::default_device();
+///
+/// // create a context for the device
+/// boost::compute::context context(gpu);
+/// \endcode
+///
+/// Once a context is created, memory can be allocated using the buffer class
+/// and kernels can be executed using the command_queue class.
+///
+/// \see device, command_queue
+class context
+{
+public:
+ /// Create a null context object.
+ context()
+ : m_context(0)
+ {
+ }
+
+ /// Creates a new context for \p device with \p properties.
+ ///
+ /// \see_opencl_ref{clCreateContext}
+ explicit context(const device &device,
+ const cl_context_properties *properties = 0)
+ {
+ BOOST_ASSERT(device.id() != 0);
+
+ cl_device_id device_id = device.id();
+
+ cl_int error = 0;
+ m_context = clCreateContext(properties, 1, &device_id, 0, 0, &error);
+
+ if(!m_context){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new context for \p devices with \p properties.
+ ///
+ /// \see_opencl_ref{clCreateContext}
+ explicit context(const std::vector<device> &devices,
+ const cl_context_properties *properties = 0)
+ {
+ BOOST_ASSERT(!devices.empty());
+
+ cl_int error = 0;
+
+ m_context = clCreateContext(
+ properties,
+ static_cast<cl_uint>(devices.size()),
+ reinterpret_cast<const cl_device_id *>(&devices[0]),
+ 0,
+ 0,
+ &error
+ );
+
+ if(!m_context){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new context object for \p context. If \p retain is
+ /// \c true, the reference count for \p context will be incremented.
+ explicit context(cl_context context, bool retain = true)
+ : m_context(context)
+ {
+ if(m_context && retain){
+ clRetainContext(m_context);
+ }
+ }
+
+ /// Creates a new context object as a copy of \p other.
+ context(const context &other)
+ : m_context(other.m_context)
+ {
+ if(m_context){
+ clRetainContext(m_context);
+ }
+ }
+
+ /// Copies the context object from \p other to \c *this.
+ context& operator=(const context &other)
+ {
+ if(this != &other){
+ if(m_context){
+ clReleaseContext(m_context);
+ }
+
+ m_context = other.m_context;
+
+ if(m_context){
+ clRetainContext(m_context);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new context object from \p other.
+ context(context&& other) BOOST_NOEXCEPT
+ : m_context(other.m_context)
+ {
+ other.m_context = 0;
+ }
+
+ /// Move-assigns the context from \p other to \c *this.
+ context& operator=(context&& other) BOOST_NOEXCEPT
+ {
+ if(m_context){
+ clReleaseContext(m_context);
+ }
+
+ m_context = other.m_context;
+ other.m_context = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the context object.
+ ~context()
+ {
+ if(m_context){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseContext(m_context)
+ );
+ }
+ }
+
+ /// Returns the underlying OpenCL context.
+ cl_context& get() const
+ {
+ return const_cast<cl_context &>(m_context);
+ }
+
+ /// Returns the device for the context. If the context contains multiple
+ /// devices, the first is returned.
+ device get_device() const
+ {
+ std::vector<device> devices = get_devices();
+
+ if(devices.empty()) {
+ return device();
+ }
+
+ return devices.front();
+ }
+
+ /// Returns a vector of devices for the context.
+ std::vector<device> get_devices() const
+ {
+ return get_info<std::vector<device> >(CL_CONTEXT_DEVICES);
+ }
+
+ /// Returns information about the context.
+ ///
+ /// \see_opencl_ref{clGetContextInfo}
+ template<class T>
+ T get_info(cl_context_info info) const
+ {
+ return detail::get_object_info<T>(clGetContextInfo, m_context, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<context, Enum>::type
+ get_info() const;
+
+ /// Returns \c true if the context is the same at \p other.
+ bool operator==(const context &other) const
+ {
+ return m_context == other.m_context;
+ }
+
+ /// Returns \c true if the context is different from \p other.
+ bool operator!=(const context &other) const
+ {
+ return m_context != other.m_context;
+ }
+
+ /// \internal_
+ operator cl_context() const
+ {
+ return m_context;
+ }
+
+private:
+ cl_context m_context;
+};
+
+/// \internal_ define get_info() specializations for context
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context,
+ ((cl_uint, CL_CONTEXT_REFERENCE_COUNT))
+ ((std::vector<cl_device_id>, CL_CONTEXT_DEVICES))
+ ((std::vector<cl_context_properties>, CL_CONTEXT_PROPERTIES))
+)
+
+#ifdef CL_VERSION_1_1
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context,
+ ((cl_uint, CL_CONTEXT_NUM_DEVICES))
+)
+#endif // CL_VERSION_1_1
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_CONTEXT_HPP
diff --git a/boost/compute/core.hpp b/boost/compute/core.hpp
new file mode 100644
index 0000000000..a8e2eb92a6
--- /dev/null
+++ b/boost/compute/core.hpp
@@ -0,0 +1,32 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_CORE_HPP
+#define BOOST_COMPUTE_CORE_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute core headers.
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/event.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/memory_object.hpp>
+#include <boost/compute/platform.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/user_event.hpp>
+#include <boost/compute/version.hpp>
+
+#endif // BOOST_COMPUTE_CORE_HPP
diff --git a/boost/compute/detail/assert_cl_success.hpp b/boost/compute/detail/assert_cl_success.hpp
new file mode 100644
index 0000000000..78acaf6caf
--- /dev/null
+++ b/boost/compute/detail/assert_cl_success.hpp
@@ -0,0 +1,24 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP
+#define BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP
+
+#include <boost/assert.hpp>
+
+#if defined(BOOST_DISABLE_ASSERTS) || defined(NDEBUG)
+#define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \
+ function
+#else
+#define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \
+ BOOST_ASSERT(function == CL_SUCCESS)
+#endif
+
+#endif // BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP
diff --git a/boost/compute/detail/buffer_value.hpp b/boost/compute/detail/buffer_value.hpp
new file mode 100644
index 0000000000..6a4e78fc19
--- /dev/null
+++ b/boost/compute/detail/buffer_value.hpp
@@ -0,0 +1,178 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP
+#define BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP
+
+#include <boost/compute/context.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/device_ptr.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+class buffer_value
+{
+public:
+ typedef T value_type;
+
+ buffer_value()
+ {
+ }
+
+ buffer_value(const value_type &value)
+ : m_value(value)
+ {
+ }
+
+ // creates a reference for the value in buffer at index (in bytes).
+ buffer_value(const buffer &buffer, size_t index)
+ : m_buffer(buffer.get(), false),
+ m_index(index)
+ {
+ }
+
+ buffer_value(const buffer_value<T> &other)
+ : m_buffer(other.m_buffer.get(), false),
+ m_index(other.m_index)
+ {
+ }
+
+ ~buffer_value()
+ {
+ // set buffer to null so that its reference count will
+ // not be decremented when its destructor is called
+ m_buffer.get() = 0;
+ }
+
+ operator value_type() const
+ {
+ if(m_buffer.get()){
+ const context &context = m_buffer.get_context();
+ const device &device = context.get_device();
+ command_queue queue(context, device);
+
+ return detail::read_single_value<T>(m_buffer, m_index / sizeof(T), queue);
+ }
+ else {
+ return m_value;
+ }
+ }
+
+ buffer_value<T> operator-() const
+ {
+ return -T(*this);
+ }
+
+ bool operator<(const T &value) const
+ {
+ return T(*this) < value;
+ }
+
+ bool operator>(const T &value) const
+ {
+ return T(*this) > value;
+ }
+
+ bool operator<=(const T &value) const
+ {
+ return T(*this) <= value;
+ }
+
+ bool operator>=(const T &value) const
+ {
+ return T(*this) <= value;
+ }
+
+ bool operator==(const T &value) const
+ {
+ return T(*this) == value;
+ }
+
+ bool operator==(const buffer_value<T> &other) const
+ {
+ if(m_buffer.get() != other.m_buffer.get()){
+ return false;
+ }
+
+ if(m_buffer.get()){
+ return m_index == other.m_index;
+ }
+ else {
+ return m_value == other.m_value;
+ }
+ }
+
+ bool operator!=(const T &value) const
+ {
+ return T(*this) != value;
+ }
+
+ buffer_value<T>& operator=(const T &value)
+ {
+ if(m_buffer.get()){
+ const context &context = m_buffer.get_context();
+ command_queue queue(context, context.get_device());
+
+ detail::write_single_value<T>(value, m_buffer, m_index / sizeof(T), queue);
+
+ return *this;
+ }
+ else {
+ m_value = value;
+ return *this;
+ }
+ }
+
+ buffer_value<T>& operator=(const buffer_value<T> &value)
+ {
+ return operator=(T(value));
+ }
+
+ detail::device_ptr<T> operator&() const
+ {
+ return detail::device_ptr<T>(m_buffer, m_index);
+ }
+
+ buffer_value<T>& operator++()
+ {
+ if(m_buffer.get()){
+ T value = T(*this);
+ value++;
+ *this = value;
+ }
+ else {
+ m_value++;
+ }
+
+ return *this;
+ }
+
+ buffer_value<T> operator++(int)
+ {
+ buffer_value<T> result(*this);
+ ++(*this);
+ return result;
+ }
+
+private:
+ const buffer m_buffer;
+ size_t m_index;
+ value_type m_value;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP
diff --git a/boost/compute/detail/device_ptr.hpp b/boost/compute/detail/device_ptr.hpp
new file mode 100644
index 0000000000..29ecd13631
--- /dev/null
+++ b/boost/compute/detail/device_ptr.hpp
@@ -0,0 +1,215 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DEVICE_PTR_HPP
+#define BOOST_COMPUTE_DEVICE_PTR_HPP
+
+#include <boost/type_traits.hpp>
+#include <boost/static_assert.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/config.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T, class IndexExpr>
+struct device_ptr_index_expr
+{
+ typedef T result_type;
+
+ device_ptr_index_expr(const buffer &buffer,
+ uint_ index,
+ const IndexExpr &expr)
+ : m_buffer(buffer),
+ m_index(index),
+ m_expr(expr)
+ {
+ }
+
+ operator T() const
+ {
+ BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value,
+ "Index expression must be integral");
+
+ BOOST_ASSERT(m_buffer.get());
+
+ const context &context = m_buffer.get_context();
+ const device &device = context.get_device();
+ command_queue queue(context, device);
+
+ return detail::read_single_value<T>(m_buffer, m_expr, queue);
+ }
+
+ const buffer &m_buffer;
+ uint_ m_index;
+ IndexExpr m_expr;
+};
+
+template<class T>
+class device_ptr
+{
+public:
+ typedef T value_type;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef std::random_access_iterator_tag iterator_category;
+ typedef T* pointer;
+ typedef T& reference;
+
+ device_ptr()
+ : m_index(0)
+ {
+ }
+
+ device_ptr(const buffer &buffer, size_t index = 0)
+ : m_buffer(buffer.get(), false),
+ m_index(index)
+ {
+ }
+
+ device_ptr(const device_ptr<T> &other)
+ : m_buffer(other.m_buffer.get(), false),
+ m_index(other.m_index)
+ {
+ }
+
+ device_ptr<T>& operator=(const device_ptr<T> &other)
+ {
+ if(this != &other){
+ m_buffer.get() = other.m_buffer.get();
+ m_index = other.m_index;
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ device_ptr(device_ptr<T>&& other) BOOST_NOEXCEPT
+ : m_buffer(other.m_buffer.get(), false),
+ m_index(other.m_index)
+ {
+ other.m_buffer.get() = 0;
+ }
+
+ device_ptr<T>& operator=(device_ptr<T>&& other) BOOST_NOEXCEPT
+ {
+ m_buffer.get() = other.m_buffer.get();
+ m_index = other.m_index;
+
+ other.m_buffer.get() = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ ~device_ptr()
+ {
+ // set buffer to null so that its reference count will
+ // not be decremented when its destructor is called
+ m_buffer.get() = 0;
+ }
+
+ size_type get_index() const
+ {
+ return m_index;
+ }
+
+ const buffer& get_buffer() const
+ {
+ return m_buffer;
+ }
+
+ template<class OT>
+ device_ptr<OT> cast() const
+ {
+ return device_ptr<OT>(m_buffer, m_index);
+ }
+
+ device_ptr<T> operator+(difference_type n) const
+ {
+ return device_ptr<T>(m_buffer, m_index + n);
+ }
+
+ device_ptr<T> operator+(const device_ptr<T> &other) const
+ {
+ return device_ptr<T>(m_buffer, m_index + other.m_index);
+ }
+
+ device_ptr<T>& operator+=(difference_type n)
+ {
+ m_index += static_cast<size_t>(n);
+ return *this;
+ }
+
+ difference_type operator-(const device_ptr<T> &other) const
+ {
+ return static_cast<difference_type>(m_index - other.m_index);
+ }
+
+ device_ptr<T>& operator-=(difference_type n)
+ {
+ m_index -= n;
+ return *this;
+ }
+
+ bool operator==(const device_ptr<T> &other) const
+ {
+ return m_buffer.get() == other.m_buffer.get() &&
+ m_index == other.m_index;
+ }
+
+ bool operator!=(const device_ptr<T> &other) const
+ {
+ return !(*this == other);
+ }
+
+ template<class Expr>
+ detail::device_ptr_index_expr<T, Expr>
+ operator[](const Expr &expr) const
+ {
+ BOOST_ASSERT(m_buffer.get());
+
+ return detail::device_ptr_index_expr<T, Expr>(m_buffer,
+ uint_(m_index),
+ expr);
+ }
+
+private:
+ const buffer m_buffer;
+ size_t m_index;
+};
+
+// is_buffer_iterator specialization for device_ptr
+template<class Iterator>
+struct is_buffer_iterator<
+ Iterator,
+ typename boost::enable_if<
+ boost::is_same<
+ device_ptr<typename Iterator::value_type>,
+ typename boost::remove_const<Iterator>::type
+ >
+ >::type
+> : public boost::true_type {};
+
+} // end detail namespace
+
+// is_device_iterator specialization for device_ptr
+template<class T>
+struct is_device_iterator<detail::device_ptr<T> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DEVICE_PTR_HPP
diff --git a/boost/compute/detail/diagnostic.hpp b/boost/compute/detail/diagnostic.hpp
new file mode 100644
index 0000000000..76a69f6570
--- /dev/null
+++ b/boost/compute/detail/diagnostic.hpp
@@ -0,0 +1,112 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP
+#define BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP
+
+// Macros for suppressing warnings for GCC version 4.6 or later. Usage:
+//
+// BOOST_COMPUTE_BOOST_COMPUTE_GCC_DIAG_OFF(sign-compare);
+// if(a < b){
+// BOOST_COMPUTE_BOOST_COMPUTE_GCC_DIAG_ON(sign-compare);
+//
+// Source: https://svn.boost.org/trac/boost/wiki/Guidelines/WarningsGuidelines
+#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
+#define BOOST_COMPUTE_GCC_DIAG_STR(s) #s
+#define BOOST_COMPUTE_GCC_DIAG_JOINSTR(x,y) BOOST_COMPUTE_GCC_DIAG_STR(x ## y)
+# define BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
+# define BOOST_COMPUTE_GCC_DIAG_PRAGMA(x) BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
+# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
+# define BOOST_COMPUTE_GCC_DIAG_OFF(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(push) \
+ BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x))
+# define BOOST_COMPUTE_GCC_DIAG_ON(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(pop)
+# else
+# define BOOST_COMPUTE_GCC_DIAG_OFF(x) \
+ BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x))
+# define BOOST_COMPUTE_GCC_DIAG_ON(x) \
+ BOOST_COMPUTE_GCC_DIAG_PRAGMA(warning BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x))
+# endif
+#else // Ensure these macros do nothing for other compilers.
+# define BOOST_COMPUTE_GCC_DIAG_OFF(x)
+# define BOOST_COMPUTE_GCC_DIAG_ON(x)
+#endif
+
+// Macros for suppressing warnings for Clang.
+//
+// BOOST_COMPUTE_BOOST_COMPUTE_CLANG_DIAG_OFF(sign-compare);
+// if(a < b){
+// BOOST_COMPUTE_BOOST_COMPUTE_CLANG_DIAG_ON(sign-compare);
+//
+// Source: https://svn.boost.org/trac/boost/wiki/Guidelines/WarningsGuidelines
+#ifdef __clang__
+# define BOOST_COMPUTE_CLANG_DIAG_STR(s) # s
+// stringize s to "no-sign-compare"
+# define BOOST_COMPUTE_CLANG_DIAG_JOINSTR(x,y) BOOST_COMPUTE_CLANG_DIAG_STR(x ## y)
+// join -W with no-unused-variable to "-Wno-sign-compare"
+# define BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(x) _Pragma (#x)
+// _Pragma is unary operator #pragma ("")
+# define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) \
+ BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(clang diagnostic x)
+# define BOOST_COMPUTE_CLANG_DIAG_OFF(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(push) \
+ BOOST_COMPUTE_CLANG_DIAG_PRAGMA(ignored BOOST_COMPUTE_CLANG_DIAG_JOINSTR(-W,x))
+// For example: #pragma clang diagnostic ignored "-Wno-sign-compare"
+# define BOOST_COMPUTE_CLANG_DIAG_ON(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(pop)
+// For example: #pragma clang diagnostic warning "-Wno-sign-compare"
+#else // Ensure these macros do nothing for other compilers.
+# define BOOST_COMPUTE_CLANG_DIAG_OFF(x)
+# define BOOST_COMPUTE_CLANG_DIAG_ON(x)
+# define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x)
+#endif
+
+// Macros for suppressing warnings for MSVC. Usage:
+//
+// BOOST_COMPUTE_BOOST_COMPUTE_MSVC_DIAG_OFF(4018); //sign-compare
+// if(a < b){
+// BOOST_COMPUTE_BOOST_COMPUTE_MSVC_DIAG_ON(4018);
+//
+#if defined(_MSC_VER)
+# define BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(x) __pragma(x)
+# define BOOST_COMPUTE_MSVC_DIAG_PRAGMA(x) \
+ BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(warning(x))
+# define BOOST_COMPUTE_MSVC_DIAG_OFF(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(push) \
+ BOOST_COMPUTE_MSVC_DIAG_PRAGMA(disable: x)
+# define BOOST_COMPUTE_MSVC_DIAG_ON(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(pop)
+#else // Ensure these macros do nothing for other compilers.
+# define BOOST_COMPUTE_MSVC_DIAG_OFF(x)
+# define BOOST_COMPUTE_MSVC_DIAG_ON(x)
+#endif
+
+// Macros for suppressing warnings for GCC, Clang and MSVC. Usage:
+//
+// BOOST_COMPUTE_DIAG_OFF(sign-compare, sign-compare, 4018);
+// if(a < b){
+// BOOST_COMPUTE_DIAG_ON(sign-compare, sign-compare, 4018);
+//
+#if defined(_MSC_VER) // MSVC
+# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_MSVC_DIAG_OFF(msvc)
+# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_MSVC_DIAG_ON(msvc)
+#elif defined(__clang__) // Clang
+# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_CLANG_DIAG_OFF(clang)
+# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_CLANG_DIAG_ON(clang)
+#elif defined(__GNUC__) // GCC/G++
+# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_GCC_DIAG_OFF(gcc)
+# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_GCC_DIAG_ON(gcc)
+#else // Ensure these macros do nothing for other compilers.
+# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc)
+# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc)
+#endif
+
+#define BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS() \
+ BOOST_COMPUTE_DIAG_OFF(deprecated-declarations, deprecated-declarations, 4996)
+#define BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS() \
+ BOOST_COMPUTE_DIAG_ON(deprecated-declarations, deprecated-declarations, 4996);
+
+
+#endif /* BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP */
diff --git a/boost/compute/detail/duration.hpp b/boost/compute/detail/duration.hpp
new file mode 100644
index 0000000000..601f12d291
--- /dev/null
+++ b/boost/compute/detail/duration.hpp
@@ -0,0 +1,50 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_DURATION_HPP
+#define BOOST_COMPUTE_DETAIL_DURATION_HPP
+
+#include <boost/config.hpp>
+
+#ifndef BOOST_COMPUTE_NO_HDR_CHRONO
+#include <chrono>
+#endif
+
+#include <boost/chrono/duration.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+#ifndef BOOST_COMPUTE_NO_HDR_CHRONO
+template<class Rep, class Period>
+inline std::chrono::duration<Rep, Period>
+make_duration_from_nanoseconds(std::chrono::duration<Rep, Period>, size_t nanoseconds)
+{
+ return std::chrono::duration_cast<std::chrono::duration<Rep, Period> >(
+ std::chrono::nanoseconds(nanoseconds)
+ );
+}
+#endif // BOOST_COMPUTE_NO_HDR_CHRONO
+
+template<class Rep, class Period>
+inline boost::chrono::duration<Rep, Period>
+make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nanoseconds)
+{
+ return boost::chrono::duration_cast<boost::chrono::duration<Rep, Period> >(
+ boost::chrono::nanoseconds(nanoseconds)
+ );
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_DURATION_HPP
diff --git a/boost/compute/detail/get_object_info.hpp b/boost/compute/detail/get_object_info.hpp
new file mode 100644
index 0000000000..cdc20cbc13
--- /dev/null
+++ b/boost/compute/detail/get_object_info.hpp
@@ -0,0 +1,216 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP
+#define BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP
+
+#include <string>
+#include <vector>
+
+#include <boost/preprocessor/seq/for_each.hpp>
+#include <boost/preprocessor/tuple/elem.hpp>
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Function, class Object, class AuxInfo>
+struct bound_info_function
+{
+ bound_info_function(Function function, Object object, AuxInfo aux_info)
+ : m_function(function),
+ m_object(object),
+ m_aux_info(aux_info)
+ {
+ }
+
+ template<class Info>
+ cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const
+ {
+ return m_function(m_object, m_aux_info, info, size, value, size_ret);
+ }
+
+ Function m_function;
+ Object m_object;
+ AuxInfo m_aux_info;
+};
+
+template<class Function, class Object>
+struct bound_info_function<Function, Object, void>
+{
+ bound_info_function(Function function, Object object)
+ : m_function(function),
+ m_object(object)
+ {
+ }
+
+ template<class Info>
+ cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const
+ {
+ return m_function(m_object, info, size, value, size_ret);
+ }
+
+ Function m_function;
+ Object m_object;
+};
+
+template<class Function, class Object>
+inline bound_info_function<Function, Object, void>
+bind_info_function(Function f, Object o)
+{
+ return bound_info_function<Function, Object, void>(f, o);
+}
+
+template<class Function, class Object, class AuxInfo>
+inline bound_info_function<Function, Object, AuxInfo>
+bind_info_function(Function f, Object o, AuxInfo j)
+{
+ return bound_info_function<Function, Object, AuxInfo>(f, o, j);
+}
+
+// default implementation
+template<class T>
+struct get_object_info_impl
+{
+ template<class Function, class Info>
+ T operator()(Function function, Info info) const
+ {
+ T value;
+
+ cl_int ret = function(info, sizeof(T), &value, 0);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return value;
+ }
+};
+
+// specialization for bool
+template<>
+struct get_object_info_impl<bool>
+{
+ template<class Function, class Info>
+ bool operator()(Function function, Info info) const
+ {
+ cl_bool value;
+
+ cl_int ret = function(info, sizeof(cl_bool), &value, 0);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return value == CL_TRUE;
+ }
+};
+
+// specialization for std::string
+template<>
+struct get_object_info_impl<std::string>
+{
+ template<class Function, class Info>
+ std::string operator()(Function function, Info info) const
+ {
+ size_t size = 0;
+
+ cl_int ret = function(info, 0, 0, &size);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ if(size == 0){
+ return std::string();
+ }
+
+ std::string value(size - 1, 0);
+
+ ret = function(info, size, &value[0], 0);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return value;
+ }
+};
+
+// specialization for std::vector<T>
+template<class T>
+struct get_object_info_impl<std::vector<T> >
+{
+ template<class Function, class Info>
+ std::vector<T> operator()(Function function, Info info) const
+ {
+ size_t size = 0;
+
+ cl_int ret = function(info, 0, 0, &size);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ std::vector<T> vector(size / sizeof(T));
+ ret = function(info, size, &vector[0], 0);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return vector;
+ }
+};
+
+// returns the value (of type T) from the given clGet*Info() function call.
+template<class T, class Function, class Object, class Info>
+inline T get_object_info(Function f, Object o, Info i)
+{
+ return get_object_info_impl<T>()(bind_info_function(f, o), i);
+}
+
+template<class T, class Function, class Object, class Info, class AuxInfo>
+inline T get_object_info(Function f, Object o, Info i, AuxInfo j)
+{
+ return get_object_info_impl<T>()(bind_info_function(f, o, j), i);
+}
+
+// returns the value type for the clGet*Info() call on Object with Enum.
+template<class Object, int Enum>
+struct get_object_info_type;
+
+// defines the object::get_info<Enum>() specialization
+#define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION(object_type, result_type, value) \
+ namespace detail { \
+ template<> struct get_object_info_type<object_type, value> { typedef result_type type; }; \
+ } \
+ template<> inline result_type object_type::get_info<value>() const \
+ { \
+ return get_info<result_type>(value); \
+ }
+
+// used by BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS()
+#define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL(r, data, elem) \
+ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION( \
+ data, BOOST_PP_TUPLE_ELEM(2, 0, elem), BOOST_PP_TUPLE_ELEM(2, 1, elem) \
+ )
+
+// defines the object::get_info<Enum>() specialization for each
+// (result_type, value) tuple in seq for object_type.
+#define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(object_type, seq) \
+ BOOST_PP_SEQ_FOR_EACH( \
+ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL, object_type, seq \
+ )
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP
diff --git a/boost/compute/detail/getenv.hpp b/boost/compute/detail/getenv.hpp
new file mode 100644
index 0000000000..ceb3605d5a
--- /dev/null
+++ b/boost/compute/detail/getenv.hpp
@@ -0,0 +1,36 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_GETENV_HPP
+#define BOOST_COMPUTE_DETAIL_GETENV_HPP
+
+#include <cstdlib>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+inline const char* getenv(const char *env_var)
+{
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4996)
+#endif
+ return std::getenv(env_var);
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_GETENV_HPP
diff --git a/boost/compute/detail/global_static.hpp b/boost/compute/detail/global_static.hpp
new file mode 100644
index 0000000000..d8014e4252
--- /dev/null
+++ b/boost/compute/detail/global_static.hpp
@@ -0,0 +1,37 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP
+#define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP
+
+#include <boost/compute/config.hpp>
+
+#ifdef BOOST_COMPUTE_THREAD_SAFE
+# ifdef BOOST_COMPUTE_HAVE_THREAD_LOCAL
+ // use c++11 thread local storage
+# define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \
+ thread_local type name ctor;
+# else
+ // use thread_specific_ptr from boost.thread
+# include <boost/thread/tss.hpp>
+# define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \
+ static ::boost::thread_specific_ptr< type > BOOST_PP_CAT(name, _tls_ptr_); \
+ if(!BOOST_PP_CAT(name, _tls_ptr_).get()){ \
+ BOOST_PP_CAT(name, _tls_ptr_).reset(new type ctor); \
+ } \
+ static type &name = *BOOST_PP_CAT(name, _tls_ptr_);
+# endif
+#else
+ // no thread-safety, just use static
+# define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \
+ static type name ctor;
+#endif
+
+#endif // BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP
diff --git a/boost/compute/detail/is_buffer_iterator.hpp b/boost/compute/detail/is_buffer_iterator.hpp
new file mode 100644
index 0000000000..c0caa050d6
--- /dev/null
+++ b/boost/compute/detail/is_buffer_iterator.hpp
@@ -0,0 +1,30 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP
+#define BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP
+
+#include <boost/config.hpp>
+#include <boost/type_traits.hpp>
+#include <boost/utility/enable_if.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// default = false
+template<class Iterator, class Enable = void>
+struct is_buffer_iterator : public boost::false_type {};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP
diff --git a/boost/compute/detail/is_contiguous_iterator.hpp b/boost/compute/detail/is_contiguous_iterator.hpp
new file mode 100644
index 0000000000..d0889b2f9e
--- /dev/null
+++ b/boost/compute/detail/is_contiguous_iterator.hpp
@@ -0,0 +1,118 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP
+#define BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP
+
+#include <vector>
+#include <valarray>
+
+#include <boost/config.hpp>
+#include <boost/type_traits.hpp>
+#include <boost/utility/enable_if.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// default = false
+template<class Iterator, class Enable = void>
+struct _is_contiguous_iterator : public boost::false_type {};
+
+// std::vector<T>::iterator = true
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_same<
+ Iterator,
+ typename std::vector<typename Iterator::value_type>::iterator
+ >::type
+ >::type
+> : public boost::true_type {};
+
+// std::vector<T>::const_iterator = true
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_same<
+ Iterator,
+ typename std::vector<typename Iterator::value_type>::const_iterator
+ >::type
+ >::type
+> : public boost::true_type {};
+
+// std::valarray<T>::iterator = true
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_same<
+ Iterator,
+ typename std::valarray<typename Iterator::value_type>::iterator
+ >::type
+ >::type
+> : public boost::true_type {};
+
+// std::valarray<T>::const_iterator = true
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_same<
+ Iterator,
+ typename std::valarray<typename Iterator::value_type>::const_iterator
+ >::type
+ >::type
+> : public boost::true_type {};
+
+// T* = true
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ boost::is_pointer<Iterator>
+ >::type
+> : public boost::true_type {};
+
+// the is_contiguous_iterator meta-function returns true if Iterator points
+// to a range of contiguous values. examples of contiguous iterators are
+// std::vector<>::iterator and float*. examples of non-contiguous iterators
+// are std::set<>::iterator and std::insert_iterator<>.
+//
+// the implementation consists of two phases. the first checks that value_type
+// for the iterator is not void. this must be done as for many containers void
+// is not a valid value_type (ex. std::vector<void>::iterator is not valid).
+// after ensuring a non-void value_type, the _is_contiguous_iterator function
+// is invoked. it has specializations retuning true for all (known) contiguous
+// iterators types and a default value of false.
+template<class Iterator, class Enable = void>
+struct is_contiguous_iterator :
+ public _is_contiguous_iterator<
+ typename boost::remove_cv<Iterator>::type
+ > {};
+
+// value_type of void = false
+template<class Iterator>
+struct is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_void<
+ typename Iterator::value_type
+ >::type
+ >::type
+> : public boost::false_type {};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP
diff --git a/boost/compute/detail/iterator_plus_distance.hpp b/boost/compute/detail/iterator_plus_distance.hpp
new file mode 100644
index 0000000000..26e95f16c0
--- /dev/null
+++ b/boost/compute/detail/iterator_plus_distance.hpp
@@ -0,0 +1,53 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP
+#define BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP
+
+#include <iterator>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator, class Distance, class Tag>
+inline Iterator iterator_plus_distance(Iterator i, Distance n, Tag)
+{
+ while(n--){ i++; }
+
+ return i;
+}
+
+template<class Iterator, class Distance>
+inline Iterator iterator_plus_distance(Iterator i,
+ Distance n,
+ std::random_access_iterator_tag)
+{
+ typedef typename
+ std::iterator_traits<Iterator>::difference_type difference_type;
+
+ return i + static_cast<difference_type>(n);
+}
+
+// similar to std::advance() except returns the advanced iterator and
+// also works with iterators that don't define difference_type
+template<class Iterator, class Distance>
+inline Iterator iterator_plus_distance(Iterator i, Distance n)
+{
+ typedef typename std::iterator_traits<Iterator>::iterator_category tag;
+
+ return iterator_plus_distance(i, n, tag());
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP
diff --git a/boost/compute/detail/iterator_range_size.hpp b/boost/compute/detail/iterator_range_size.hpp
new file mode 100644
index 0000000000..67a675f833
--- /dev/null
+++ b/boost/compute/detail/iterator_range_size.hpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H
+#define BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H
+
+#include <cstddef>
+#include <algorithm>
+#include <iterator>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// This is a convenience function which returns the size of a range
+// bounded by two iterators. This function has two differences from
+// the std::distance() function: 1) the return type (size_t) is
+// unsigned, and 2) the return value is always positive.
+template<class Iterator>
+inline size_t iterator_range_size(Iterator first, Iterator last)
+{
+ typedef typename
+ std::iterator_traits<Iterator>::difference_type
+ difference_type;
+
+ difference_type difference = std::distance(first, last);
+
+ return static_cast<size_t>(
+ (std::max)(difference, static_cast<difference_type>(0))
+ );
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H
diff --git a/boost/compute/detail/iterator_traits.hpp b/boost/compute/detail/iterator_traits.hpp
new file mode 100644
index 0000000000..45f0f683e6
--- /dev/null
+++ b/boost/compute/detail/iterator_traits.hpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP
+#define BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP
+
+#include <iterator>
+
+#include <boost/compute/detail/is_contiguous_iterator.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator>
+struct iterator_traits : public std::iterator_traits<Iterator>
+{
+ static const bool is_contiguous = is_contiguous_iterator<Iterator>::value;
+ static const bool is_on_device = is_device_iterator<Iterator>::value;
+ static const bool is_on_host = !is_on_device;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_TRAITS_HPP
diff --git a/boost/compute/detail/literal.hpp b/boost/compute/detail/literal.hpp
new file mode 100644
index 0000000000..0d23b1d4d2
--- /dev/null
+++ b/boost/compute/detail/literal.hpp
@@ -0,0 +1,45 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_LITERAL_HPP
+#define BOOST_COMPUTE_DETAIL_LITERAL_HPP
+
+#include <iomanip>
+#include <limits>
+#include <sstream>
+
+#include <boost/type_traits/is_same.hpp>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+std::string make_literal(T x)
+{
+ std::stringstream s;
+ s << std::setprecision(std::numeric_limits<T>::digits10)
+ << std::scientific
+ << x;
+
+ if(boost::is_same<T, float>::value || boost::is_same<T, float_>::value){
+ s << "f";
+ }
+
+ return s.str();
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_LITERAL_HPP
diff --git a/boost/compute/detail/lru_cache.hpp b/boost/compute/detail/lru_cache.hpp
new file mode 100644
index 0000000000..fe1a56f74b
--- /dev/null
+++ b/boost/compute/detail/lru_cache.hpp
@@ -0,0 +1,139 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP
+#define BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP
+
+#include <map>
+#include <list>
+#include <utility>
+
+#include <boost/optional.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// a cache which evicts the least recently used item when it is full
+template<class Key, class Value>
+class lru_cache
+{
+public:
+ typedef Key key_type;
+ typedef Value value_type;
+ typedef std::list<key_type> list_type;
+ typedef std::map<
+ key_type,
+ std::pair<value_type, typename list_type::iterator>
+ > map_type;
+
+ lru_cache(size_t capacity)
+ : m_capacity(capacity)
+ {
+ }
+
+ ~lru_cache()
+ {
+ }
+
+ size_t size() const
+ {
+ return m_map.size();
+ }
+
+ size_t capacity() const
+ {
+ return m_capacity;
+ }
+
+ bool empty() const
+ {
+ return m_map.empty();
+ }
+
+ bool contains(const key_type &key)
+ {
+ return m_map.find(key) != m_map.end();
+ }
+
+ void insert(const key_type &key, const value_type &value)
+ {
+ typename map_type::iterator i = m_map.find(key);
+ if(i == m_map.end()){
+ // insert item into the cache, but first check if it is full
+ if(size() >= m_capacity){
+ // cache is full, evict the least recently used item
+ evict();
+ }
+
+ // insert the new item
+ m_list.push_front(key);
+ m_map[key] = std::make_pair(value, m_list.begin());
+ }
+ }
+
+ boost::optional<value_type> get(const key_type &key)
+ {
+ // lookup value in the cache
+ typename map_type::iterator i = m_map.find(key);
+ if(i == m_map.end()){
+ // value not in cache
+ return boost::none;
+ }
+
+ // return the value, but first update its place in the most
+ // recently used list
+ typename list_type::iterator j = i->second.second;
+ if(j != m_list.begin()){
+ // move item to the front of the most recently used list
+ m_list.erase(j);
+ m_list.push_front(key);
+
+ // update iterator in map
+ j = m_list.begin();
+ const value_type &value = i->second.first;
+ m_map[key] = std::make_pair(value, j);
+
+ // return the value
+ return value;
+ }
+ else {
+ // the item is already at the front of the most recently
+ // used list so just return it
+ return i->second.first;
+ }
+ }
+
+ void clear()
+ {
+ m_map.clear();
+ m_list.clear();
+ }
+
+private:
+ void evict()
+ {
+ // evict item from the end of most recently used list
+ typename list_type::iterator i = --m_list.end();
+ m_map.erase(*i);
+ m_list.erase(i);
+ }
+
+private:
+ map_type m_map;
+ list_type m_list;
+ size_t m_capacity;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP
diff --git a/boost/compute/detail/meta_kernel.hpp b/boost/compute/detail/meta_kernel.hpp
new file mode 100644
index 0000000000..7be778b025
--- /dev/null
+++ b/boost/compute/detail/meta_kernel.hpp
@@ -0,0 +1,1054 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
+#define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
+
+#include <set>
+#include <string>
+#include <vector>
+#include <iomanip>
+#include <sstream>
+#include <utility>
+
+#include <boost/tuple/tuple.hpp>
+#include <boost/type_traits.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/static_assert.hpp>
+#include <boost/algorithm/string/find.hpp>
+#include <boost/preprocessor/repetition.hpp>
+
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/closure.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/type_traits.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/image/image2d.hpp>
+#include <boost/compute/image/image_sampler.hpp>
+#include <boost/compute/memory_object.hpp>
+#include <boost/compute/detail/device_ptr.hpp>
+#include <boost/compute/detail/sha1.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+class meta_kernel_variable
+{
+public:
+ typedef T result_type;
+
+ meta_kernel_variable(const std::string &name)
+ : m_name(name)
+ {
+ }
+
+ meta_kernel_variable(const meta_kernel_variable &other)
+ : m_name(other.m_name)
+ {
+ }
+
+ meta_kernel_variable& operator=(const meta_kernel_variable &other)
+ {
+ if(this != &other){
+ m_name = other.m_name;
+ }
+
+ return *this;
+ }
+
+ ~meta_kernel_variable()
+ {
+ }
+
+ std::string name() const
+ {
+ return m_name;
+ }
+
+private:
+ std::string m_name;
+};
+
+template<class T>
+class meta_kernel_literal
+{
+public:
+ typedef T result_type;
+
+ meta_kernel_literal(const T &value)
+ : m_value(value)
+ {
+ }
+
+ meta_kernel_literal(const meta_kernel_literal &other)
+ : m_value(other.m_value)
+ {
+ }
+
+ meta_kernel_literal& operator=(const meta_kernel_literal &other)
+ {
+ if(this != &other){
+ m_value = other.m_value;
+ }
+
+ return *this;
+ }
+
+ ~meta_kernel_literal()
+ {
+ }
+
+ const T& value() const
+ {
+ return m_value;
+ }
+
+private:
+ T m_value;
+};
+
+struct meta_kernel_stored_arg
+{
+ meta_kernel_stored_arg()
+ : m_size(0),
+ m_value(0)
+ {
+ }
+
+ meta_kernel_stored_arg(const meta_kernel_stored_arg &other)
+ : m_size(0),
+ m_value(0)
+ {
+ set_value(other.m_size, other.m_value);
+ }
+
+ meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other)
+ {
+ if(this != &other){
+ set_value(other.m_size, other.m_value);
+ }
+
+ return *this;
+ }
+
+ template<class T>
+ meta_kernel_stored_arg(const T &value)
+ : m_size(0),
+ m_value(0)
+ {
+ set_value(value);
+ }
+
+ ~meta_kernel_stored_arg()
+ {
+ if(m_value){
+ std::free(m_value);
+ }
+ }
+
+ void set_value(size_t size, const void *value)
+ {
+ if(m_value){
+ std::free(m_value);
+ }
+
+ m_size = size;
+
+ if(value){
+ m_value = std::malloc(size);
+ std::memcpy(m_value, value, size);
+ }
+ else {
+ m_value = 0;
+ }
+ }
+
+ template<class T>
+ void set_value(const T &value)
+ {
+ set_value(sizeof(T), boost::addressof(value));
+ }
+
+ size_t m_size;
+ void *m_value;
+};
+
+struct meta_kernel_buffer_info
+{
+ meta_kernel_buffer_info(const buffer &buffer,
+ const std::string &id,
+ memory_object::address_space addr_space,
+ size_t i)
+ : m_mem(buffer.get()),
+ identifier(id),
+ address_space(addr_space),
+ index(i)
+ {
+ }
+
+ cl_mem m_mem;
+ std::string identifier;
+ memory_object::address_space address_space;
+ size_t index;
+};
+
+class meta_kernel;
+
+template<class Type>
+struct inject_type_impl
+{
+ void operator()(meta_kernel &)
+ {
+ // default implementation does nothing
+ }
+};
+
+#define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \
+ meta_kernel& operator<<(const type &x) \
+ { \
+ m_source << x; \
+ return *this; \
+ }
+
+#define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \
+ meta_kernel& operator<<(const type &x) \
+ { \
+ m_source << "(" << type_name<type>() << ")"; \
+ m_source << "("; \
+ for(size_t i = 0; i < vector_size<type>::value; i++){ \
+ *this << lit(x[i]); \
+ \
+ if(i != vector_size<type>::value - 1){ \
+ m_source << ","; \
+ } \
+ } \
+ m_source << ")"; \
+ return *this; \
+ }
+
+#define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \
+ BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _))
+
+class meta_kernel
+{
+public:
+ template<class T>
+ class argument
+ {
+ public:
+ argument(const std::string &name, size_t index)
+ : m_name(name),
+ m_index(index)
+ {
+ }
+
+ const std::string &name() const
+ {
+ return m_name;
+ }
+
+ size_t index() const
+ {
+ return m_index;
+ }
+
+ private:
+ std::string m_name;
+ size_t m_index;
+ };
+
+ explicit meta_kernel(const std::string &name)
+ : m_name(name)
+ {
+ }
+
+ meta_kernel(const meta_kernel &other)
+ {
+ m_source.str(other.m_source.str());
+ }
+
+ meta_kernel& operator=(const meta_kernel &other)
+ {
+ if(this != &other){
+ m_source.str(other.m_source.str());
+ }
+
+ return *this;
+ }
+
+ ~meta_kernel()
+ {
+ }
+
+ std::string name() const
+ {
+ return m_name;
+ }
+
+ std::string source() const
+ {
+ std::stringstream stream;
+
+ // add pragmas
+ if(!m_pragmas.empty()){
+ stream << m_pragmas << "\n";
+ }
+
+ // add macros
+ stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n";
+ stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n";
+ stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n";
+ stream << "#define boost_tuple_get(x, n) (x.v ## n)\n";
+
+ // add type declaration source
+ stream << m_type_declaration_source.str() << "\n";
+
+ // add external function source
+ stream << m_external_function_source.str() << "\n";
+
+ // add kernel source
+ stream << "__kernel void " << m_name
+ << "(" << boost::join(m_args, ", ") << ")\n"
+ << "{\n" << m_source.str() << "\n}\n";
+
+ return stream.str();
+ }
+
+ kernel compile(const context &context, const std::string &options = std::string())
+ {
+ // generate the program source
+ std::string source = this->source();
+
+ // generate cache key
+ std::string cache_key = "__boost_meta_kernel_" +
+ static_cast<std::string>(detail::sha1(source));
+
+ // load program cache
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(context);
+
+ // load (or build) program from cache
+ ::boost::compute::program program =
+ cache->get_or_build(cache_key, options, source, context);
+
+ // create kernel
+ ::boost::compute::kernel kernel = program.create_kernel(name());
+
+ // bind stored args
+ for(size_t i = 0; i < m_stored_args.size(); i++){
+ const detail::meta_kernel_stored_arg &arg = m_stored_args[i];
+
+ if(arg.m_size != 0){
+ kernel.set_arg(i, arg.m_size, arg.m_value);
+ }
+ }
+
+ // bind buffer args
+ for(size_t i = 0; i < m_stored_buffers.size(); i++){
+ const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
+
+ kernel.set_arg(bi.index, bi.m_mem);
+ }
+
+ return kernel;
+ }
+
+ template<class T>
+ size_t add_arg(const std::string &name)
+ {
+ std::stringstream stream;
+ stream << type<T>() << " " << name;
+
+ // add argument to list
+ m_args.push_back(stream.str());
+
+ // return index
+ return m_args.size() - 1;
+ }
+
+ template<class T>
+ size_t add_arg(memory_object::address_space address_space,
+ const std::string &name)
+ {
+ return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name);
+ }
+
+ template<class T>
+ void set_arg(size_t index, const T &value)
+ {
+ if(index >= m_stored_args.size()){
+ m_stored_args.resize(index + 1);
+ }
+
+ m_stored_args[index] = detail::meta_kernel_stored_arg(value);
+ }
+
+ void set_arg(size_t index, const memory_object &mem)
+ {
+ set_arg<cl_mem>(index, mem.get());
+ }
+
+ void set_arg(size_t index, const image_sampler &sampler)
+ {
+ set_arg<cl_sampler>(index, cl_sampler(sampler));
+ }
+
+ template<class T>
+ size_t add_set_arg(const std::string &name, const T &value)
+ {
+ size_t index = add_arg<T>(name);
+ set_arg<T>(index, value);
+ return index;
+ }
+
+ void add_extension_pragma(const std::string &extension,
+ const std::string &value = "enable")
+ {
+ m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n";
+ }
+
+ void add_extension_pragma(const std::string &extension,
+ const std::string &value) const
+ {
+ return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value);
+ }
+
+ template<class T>
+ std::string type() const
+ {
+ std::stringstream stream;
+
+ // const qualifier
+ if(boost::is_const<T>::value){
+ stream << "const ";
+ }
+
+ // volatile qualifier
+ if(boost::is_volatile<T>::value){
+ stream << "volatile ";
+ }
+
+ // type
+ typedef
+ typename boost::remove_cv<
+ typename boost::remove_pointer<T>::type
+ >::type Type;
+ stream << type_name<Type>();
+
+ // pointer
+ if(boost::is_pointer<T>::value){
+ stream << "*";
+ }
+
+ // inject type pragmas and/or definitions
+ inject_type<Type>();
+
+ return stream.str();
+ }
+
+ template<class T>
+ std::string decl(const std::string &name) const
+ {
+ return type<T>() + " " + name;
+ }
+
+ template<class T, class Expr>
+ std::string decl(const std::string &name, const Expr &init) const
+ {
+ meta_kernel tmp((std::string()));
+ tmp << tmp.decl<T>(name) << " = " << init;
+ return tmp.m_source.str();
+ }
+
+ template<class T>
+ detail::meta_kernel_variable<T> var(const std::string &name) const
+ {
+ type<T>();
+
+ return make_var<T>(name);
+ }
+
+ template<class T>
+ detail::meta_kernel_literal<T> lit(const T &value) const
+ {
+ type<T>();
+
+ return detail::meta_kernel_literal<T>(value);
+ }
+
+ template<class T>
+ detail::meta_kernel_variable<T> expr(const std::string &expr) const
+ {
+ type<T>();
+
+ return detail::meta_kernel_variable<T>(expr);
+ }
+
+ // define stream operators for scalar and vector types
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double)
+
+ // define stream operators for float scalar and vector types
+ meta_kernel& operator<<(const float &x)
+ {
+ m_source << std::showpoint << x << 'f';
+ return *this;
+ }
+
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_)
+ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_)
+
+ // define stream operators for variable types
+ template<class T>
+ meta_kernel& operator<<(const meta_kernel_variable<T> &variable)
+ {
+ return *this << variable.name();
+ }
+
+ // define stream operators for literal types
+ template<class T>
+ meta_kernel& operator<<(const meta_kernel_literal<T> &literal)
+ {
+ return *this << literal.value();
+ }
+
+ meta_kernel& operator<<(const meta_kernel_literal<bool> &literal)
+ {
+ return *this << (literal.value() ? "true" : "false");
+ }
+
+ meta_kernel& operator<<(const meta_kernel_literal<char> &literal)
+ {
+ const char c = literal.value();
+
+ switch(c){
+ // control characters
+ case '\0':
+ return *this << "'\\0'";
+ case '\a':
+ return *this << "'\\a'";
+ case '\b':
+ return *this << "'\\b'";
+ case '\t':
+ return *this << "'\\t'";
+ case '\n':
+ return *this << "'\\n'";
+ case '\v':
+ return *this << "'\\v'";
+ case '\f':
+ return *this << "'\\f'";
+ case '\r':
+ return *this << "'\\r'";
+
+ // characters which need escaping
+ case '\"':
+ case '\'':
+ case '\?':
+ case '\\':
+ return *this << "'\\" << c << "'";
+
+ // all other characters
+ default:
+ return *this << "'" << c << "'";
+ }
+ }
+
+ meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal)
+ {
+ return *this << lit<char>(literal.value());
+ }
+
+ meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal)
+ {
+ return *this << uint_(literal.value());
+ }
+
+ // define stream operators for strings
+ meta_kernel& operator<<(char ch)
+ {
+ m_source << ch;
+ return *this;
+ }
+
+ meta_kernel& operator<<(const char *string)
+ {
+ m_source << string;
+ return *this;
+ }
+
+ meta_kernel& operator<<(const std::string &string)
+ {
+ m_source << string;
+ return *this;
+ }
+
+ template<class T>
+ static detail::meta_kernel_variable<T> make_var(const std::string &name)
+ {
+ return detail::meta_kernel_variable<T>(name);
+ }
+
+ template<class T>
+ static detail::meta_kernel_literal<T> make_lit(const T &value)
+ {
+ return detail::meta_kernel_literal<T>(value);
+ }
+
+ template<class T>
+ static detail::meta_kernel_variable<T> make_expr(const std::string &expr)
+ {
+ return detail::meta_kernel_variable<T>(expr);
+ }
+
+ event exec(command_queue &queue)
+ {
+ return exec_1d(queue, 0, 1);
+ }
+
+ event exec_1d(command_queue &queue,
+ size_t global_work_offset,
+ size_t global_work_size)
+ {
+ const context &context = queue.get_context();
+
+ ::boost::compute::kernel kernel = compile(context);
+
+ return queue.enqueue_1d_range_kernel(
+ kernel,
+ global_work_offset,
+ global_work_size,
+ 0
+ );
+ }
+
+ event exec_1d(command_queue &queue,
+ size_t global_work_offset,
+ size_t global_work_size,
+ size_t local_work_size)
+ {
+ const context &context = queue.get_context();
+
+ ::boost::compute::kernel kernel = compile(context);
+
+ return queue.enqueue_1d_range_kernel(
+ kernel,
+ global_work_offset,
+ global_work_size,
+ local_work_size
+ );
+ }
+
+ template<class T>
+ std::string get_buffer_identifier(const buffer &buffer,
+ const memory_object::address_space address_space =
+ memory_object::global_memory)
+ {
+ // check if we've already seen buffer
+ for(size_t i = 0; i < m_stored_buffers.size(); i++){
+ const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
+
+ if(bi.m_mem == buffer.get() &&
+ bi.address_space == address_space){
+ return bi.identifier;
+ }
+ }
+
+ // create a new binding
+ std::string identifier =
+ "_buf" + lexical_cast<std::string>(m_stored_buffers.size());
+ size_t index = add_arg<T *>(address_space, identifier);
+
+ // store new buffer info
+ m_stored_buffers.push_back(
+ detail::meta_kernel_buffer_info(buffer, identifier, address_space, index));
+
+ return identifier;
+ }
+
+ std::string get_image_identifier(const char *qualifiers, const image2d &image)
+ {
+ size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image");
+
+ set_arg(index, image);
+
+ return "image";
+ }
+
+ std::string get_sampler_identifier(bool normalized_coords,
+ cl_addressing_mode addressing_mode,
+ cl_filter_mode filter_mode)
+ {
+ (void) normalized_coords;
+ (void) addressing_mode;
+ (void) filter_mode;
+
+ m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
+ " CLK_ADDRESS_NONE |\n"
+ " CLK_FILTER_NEAREST;\n";
+
+ return "sampler";
+ }
+
+ template<class Expr>
+ static std::string expr_to_string(const Expr &expr)
+ {
+ meta_kernel tmp((std::string()));
+ tmp << expr;
+ return tmp.m_source.str();
+ }
+
+ template<class Predicate>
+ detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const
+ {
+ return detail::invoked_function<bool, boost::tuple<Predicate> >(
+ "if", std::string(), boost::make_tuple(pred)
+ );
+ }
+
+ template<class Predicate>
+ detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const
+ {
+ return detail::invoked_function<bool, boost::tuple<Predicate> >(
+ "else if", std::string(), boost::make_tuple(pred)
+ );
+ }
+
+ detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const
+ {
+ return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")");
+ }
+
+ void add_function(const std::string &name, const std::string &source)
+ {
+ if(m_external_function_names.count(name)){
+ return;
+ }
+
+ m_external_function_names.insert(name);
+ m_external_function_source << source << "\n";
+ }
+
+ void add_function(const std::string &name,
+ const std::string &source,
+ const std::map<std::string, std::string> &definitions)
+ {
+ typedef std::map<std::string, std::string>::const_iterator iter;
+
+ std::stringstream s;
+
+ // add #define's
+ for(iter i = definitions.begin(); i != definitions.end(); i++){
+ s << "#define " << i->first;
+ if(!i->second.empty()){
+ s << " " << i->second;
+ }
+ s << "\n";
+ }
+
+ s << source << "\n";
+
+ // add #undef's
+ for(iter i = definitions.begin(); i != definitions.end(); i++){
+ s << "#undef " << i->first << "\n";
+ }
+
+ add_function(name, s.str());
+ }
+
+ template<class Type>
+ void add_type_declaration(const std::string &declaration)
+ {
+ const char *name = type_name<Type>();
+
+ // check if the type has already been declared
+ std::string source = m_type_declaration_source.str();
+ if(source.find(name) != std::string::npos){
+ return;
+ }
+
+ m_type_declaration_source << declaration;
+ }
+
+ template<class Type>
+ void inject_type() const
+ {
+ inject_type_impl<Type>()(const_cast<meta_kernel &>(*this));
+ }
+
+ // the insert_function_call() method inserts a call to a function with
+ // the given name tuple of argument values.
+ template<class ArgTuple>
+ void insert_function_call(const std::string &name, const ArgTuple &args)
+ {
+ *this << name << '(';
+ insert_function_call_args(args);
+ *this << ')';
+ }
+
+ // the insert_function_call_args() method takes a tuple of argument values
+ // and inserts them into the source string with a comma in-between each.
+ // this is useful for creating function calls given a tuple of values.
+ void insert_function_call_args(const boost::tuple<>&)
+ {
+ }
+
+ #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \
+ inject_type<BOOST_PP_CAT(T, n)>();
+
+ #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \
+ << boost::get<BOOST_PP_DEC(n)>(args) << ", "
+
+ #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \
+ template<BOOST_PP_ENUM_PARAMS(n, class T)> \
+ void insert_function_call_args( \
+ const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \
+ ) \
+ { \
+ BOOST_PP_REPEAT_FROM_TO( \
+ 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \
+ ) \
+ *this \
+ BOOST_PP_REPEAT_FROM_TO( \
+ 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \
+ ) \
+ << boost::get<BOOST_PP_DEC(n)>(args); \
+ }
+
+ BOOST_PP_REPEAT_FROM_TO(
+ 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~
+ )
+
+ #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE
+ #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG
+ #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS
+
+ static const char* address_space_prefix(const memory_object::address_space value)
+ {
+ switch(value){
+ case memory_object::global_memory: return "__global";
+ case memory_object::local_memory: return "__local";
+ case memory_object::private_memory: return "__private";
+ case memory_object::constant_memory: return "__constant";
+ };
+
+ return 0; // unreachable
+ }
+
+private:
+ template<class T>
+ size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name)
+ {
+ size_t index = add_arg<T>(name);
+
+ // update argument type declaration with qualifiers
+ std::stringstream s;
+ s << qualifiers << " " << m_args[index];
+ m_args[index] = s.str();
+
+ return index;
+ }
+
+private:
+ std::string m_name;
+ std::stringstream m_source;
+ std::stringstream m_external_function_source;
+ std::stringstream m_type_declaration_source;
+ std::set<std::string> m_external_function_names;
+ std::vector<std::string> m_args;
+ std::string m_pragmas;
+ std::vector<detail::meta_kernel_stored_arg> m_stored_args;
+ std::vector<detail::meta_kernel_buffer_info> m_stored_buffers;
+};
+
+template<class ResultType, class ArgTuple>
+inline meta_kernel&
+operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr)
+{
+ if(!expr.source().empty()){
+ kernel.add_function(expr.name(), expr.source(), expr.definitions());
+ }
+
+ kernel.insert_function_call(expr.name(), expr.args());
+
+ return kernel;
+}
+
+template<class ResultType, class ArgTuple, class CaptureTuple>
+inline meta_kernel&
+operator<<(meta_kernel &kernel,
+ const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr)
+{
+ if(!expr.source().empty()){
+ kernel.add_function(expr.name(), expr.source(), expr.definitions());
+ }
+
+ kernel << expr.name() << '(';
+ kernel.insert_function_call_args(expr.args());
+ kernel << ", ";
+ kernel.insert_function_call_args(expr.capture());
+ kernel << ')';
+
+ return kernel;
+}
+
+template<class Arg1, class Arg2, class Result>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_binary_operator<Arg1,
+ Arg2,
+ Result> &expr)
+{
+ return kernel << "((" << expr.arg1() << ")"
+ << expr.op()
+ << "(" << expr.arg2() << "))";
+}
+
+template<class T, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const detail::device_ptr_index_expr<T, IndexExpr> &expr)
+{
+ if(expr.m_index == 0){
+ return kernel <<
+ kernel.get_buffer_identifier<T>(expr.m_buffer) <<
+ '[' << expr.m_expr << ']';
+ }
+ else {
+ return kernel <<
+ kernel.get_buffer_identifier<T>(expr.m_buffer) <<
+ '[' << expr.m_index << "+(" << expr.m_expr << ")]";
+ }
+}
+
+template<class T1, class T2, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr)
+{
+ typedef std::pair<T1, T2> T;
+
+ if(expr.m_index == 0){
+ return kernel <<
+ kernel.get_buffer_identifier<T>(expr.m_buffer) <<
+ '[' << expr.m_expr << ']';
+ }
+ else {
+ return kernel <<
+ kernel.get_buffer_identifier<T>(expr.m_buffer) <<
+ '[' << expr.m_index << "+(" << expr.m_expr << ")]";
+ }
+}
+
+template<class Predicate, class Arg>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_unary_negate_function<Predicate,
+ Arg> &expr)
+{
+ return kernel << "!(" << expr.pred()(expr.expr()) << ')';
+}
+
+template<class Predicate, class Arg1, class Arg2>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_binary_negate_function<Predicate,
+ Arg1,
+ Arg2> &expr)
+{
+ return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')';
+}
+
+// get<N>() for vector types
+template<size_t N, class Arg, class T>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_get<N, Arg, T> &expr)
+{
+ BOOST_STATIC_ASSERT(N < 16);
+
+ if(N < 10){
+ return kernel << expr.m_arg << ".s" << uint_(N);
+ }
+ else if(N < 16){
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4307)
+#endif
+ return kernel << expr.m_arg << ".s" << char('a' + (N - 10));
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+ }
+
+ return kernel;
+}
+
+template<class T, class Arg>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_field<T, Arg> &expr)
+{
+ return kernel << expr.m_arg << "." << expr.m_field;
+}
+
+template<class T, class Arg>
+inline meta_kernel& operator<<(meta_kernel &k,
+ const invoked_as<T, Arg> &expr)
+{
+ return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")";
+}
+
+template<class T, class Arg>
+inline meta_kernel& operator<<(meta_kernel &k,
+ const invoked_convert<T, Arg> &expr)
+{
+ return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")";
+}
+
+template<class T, class Arg>
+inline meta_kernel& operator<<(meta_kernel &k,
+ const invoked_identity<T, Arg> &expr)
+{
+ return k << expr.m_arg;
+}
+
+template<>
+struct inject_type_impl<double_>
+{
+ void operator()(meta_kernel &kernel)
+ {
+ kernel.add_extension_pragma("cl_khr_fp64", "enable");
+ }
+};
+
+template<class Scalar, size_t N>
+struct inject_type_impl<vector_type<Scalar, N> >
+{
+ void operator()(meta_kernel &kernel)
+ {
+ kernel.inject_type<Scalar>();
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
diff --git a/boost/compute/detail/mpl_vector_to_tuple.hpp b/boost/compute/detail/mpl_vector_to_tuple.hpp
new file mode 100644
index 0000000000..292a6e36e1
--- /dev/null
+++ b/boost/compute/detail/mpl_vector_to_tuple.hpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP
+#define BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP
+
+#include <boost/mpl/copy.hpp>
+#include <boost/mpl/vector.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/fusion/include/mpl.hpp>
+#include <boost/fusion/adapted/boost_tuple.hpp>
+#include <boost/preprocessor/repetition.hpp>
+
+#include <boost/compute/config.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+namespace mpl = boost::mpl;
+
+template<class Vector, size_t N>
+struct mpl_vector_to_tuple_impl;
+
+#define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \
+ typename mpl::at_c<Vector, n>::type
+
+#define BOOST_COMPUTE_VEC2TUP(z, n, unused) \
+template<class Vector> \
+struct mpl_vector_to_tuple_impl<Vector, n> \
+{ \
+ typedef typename \
+ boost::tuple< \
+ BOOST_PP_ENUM(n, BOOST_COMPUTE_PRINT_ELEM, ~) \
+ > type; \
+};
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_VEC2TUP, ~)
+
+#undef BOOST_COMPUTE_VEC2TUP
+#undef BOOST_COMPUTE_PRINT_ELEM
+
+// meta-function which converts a mpl::vector to a boost::tuple
+template<class Vector>
+struct mpl_vector_to_tuple
+{
+ typedef typename
+ mpl_vector_to_tuple_impl<
+ Vector,
+ mpl::size<Vector>::value
+ >::type type;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP
diff --git a/boost/compute/detail/nvidia_compute_capability.hpp b/boost/compute/detail/nvidia_compute_capability.hpp
new file mode 100644
index 0000000000..3f859562bd
--- /dev/null
+++ b/boost/compute/detail/nvidia_compute_capability.hpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP
+#define BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP
+
+#include <boost/compute/device.hpp>
+
+#ifdef BOOST_COMPUTE_HAVE_HDR_CL_EXT
+ #include <CL/cl_ext.h>
+#endif
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
+ #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
+#else
+ #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
+#endif
+
+#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
+ #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
+#else
+ #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
+#endif
+
+inline void get_nvidia_compute_capability(const device &device, int &major, int &minor)
+{
+ if(!device.supports_extension("cl_nv_device_attribute_query")){
+ major = minor = 0;
+ return;
+ }
+
+ major = device.get_info<uint_>(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV);
+ minor = device.get_info<uint_>(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
+}
+
+inline bool check_nvidia_compute_capability(const device &device, int major, int minor)
+{
+ int actual_major, actual_minor;
+ get_nvidia_compute_capability(device, actual_major, actual_minor);
+
+ return actual_major > major ||
+ (actual_major == major && actual_minor >= minor);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP
diff --git a/boost/compute/detail/parameter_cache.hpp b/boost/compute/detail/parameter_cache.hpp
new file mode 100644
index 0000000000..2a856311e1
--- /dev/null
+++ b/boost/compute/detail/parameter_cache.hpp
@@ -0,0 +1,215 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP
+#define BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP
+
+#include <algorithm>
+#include <string>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/make_shared.hpp>
+#include <boost/noncopyable.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/detail/global_static.hpp>
+#include <boost/compute/version.hpp>
+
+#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+#include <boost/algorithm/string/trim.hpp>
+#include <boost/compute/detail/path.hpp>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/json_parser.hpp>
+#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+class parameter_cache : boost::noncopyable
+{
+public:
+ parameter_cache(const device &device)
+ : m_dirty(false),
+ m_device_name(device.name())
+ {
+ #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+ // get offline cache file name (e.g. /home/user/.boost_compute/tune/device.json)
+ m_file_name = make_file_name();
+
+ // load parameters from offline cache file (if it exists)
+ if(boost::filesystem::exists(m_file_name)){
+ read_from_disk();
+ }
+ #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE
+ }
+
+ ~parameter_cache()
+ {
+ #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+ write_to_disk();
+ #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE
+ }
+
+ void set(const std::string &object, const std::string &parameter, uint_ value)
+ {
+ m_cache[std::make_pair(object, parameter)] = value;
+
+ // set the dirty flag to true. this will cause the updated parameters
+ // to be stored to disk.
+ m_dirty = true;
+ }
+
+ uint_ get(const std::string &object, const std::string &parameter, uint_ default_value)
+ {
+ std::map<std::pair<std::string, std::string>, uint_>::iterator
+ iter = m_cache.find(std::make_pair(object, parameter));
+ if(iter != m_cache.end()){
+ return iter->second;
+ }
+ else {
+ return default_value;
+ }
+ }
+
+ static boost::shared_ptr<parameter_cache> get_global_cache(const device &device)
+ {
+ // device name -> parameter cache
+ typedef std::map<std::string, boost::shared_ptr<parameter_cache> > cache_map;
+
+ BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, ((std::less<std::string>())));
+
+ cache_map::iterator iter = caches.find(device.name());
+ if(iter == caches.end()){
+ boost::shared_ptr<parameter_cache> cache =
+ boost::make_shared<parameter_cache>(device);
+
+ caches.insert(iter, std::make_pair(device.name(), cache));
+
+ return cache;
+ }
+ else {
+ return iter->second;
+ }
+ }
+
+private:
+#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+ // returns a string containing a cannoical device name
+ static std::string cannonical_device_name(std::string name)
+ {
+ boost::algorithm::trim(name);
+ std::replace(name.begin(), name.end(), ' ', '_');
+ std::replace(name.begin(), name.end(), '(', '_');
+ std::replace(name.begin(), name.end(), ')', '_');
+ return name;
+ }
+
+ // returns the boost.compute version string
+ static std::string version_string()
+ {
+ char buf[32];
+ std::snprintf(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR,
+ BOOST_COMPUTE_VERSION_MINOR,
+ BOOST_COMPUTE_VERSION_PATCH);
+ return buf;
+ }
+
+ // returns the file path for the cached parameters
+ std::string make_file_name() const
+ {
+ return detail::parameter_cache_path(true) + cannonical_device_name(m_device_name) + ".json";
+ }
+
+ // store current parameters to disk
+ void write_to_disk()
+ {
+ BOOST_ASSERT(!m_file_name.empty());
+
+ if(m_dirty){
+ // save current parameters to disk
+ boost::property_tree::ptree pt;
+ pt.put("header.device", m_device_name);
+ pt.put("header.version", version_string());
+ typedef std::map<std::pair<std::string, std::string>, uint_> map_type;
+ for(map_type::const_iterator iter = m_cache.begin(); iter != m_cache.end(); ++iter){
+ const std::pair<std::string, std::string> &key = iter->first;
+ pt.add(key.first + "." + key.second, iter->second);
+ }
+ write_json(m_file_name, pt);
+
+ m_dirty = false;
+ }
+ }
+
+ // load stored parameters from disk
+ void read_from_disk()
+ {
+ BOOST_ASSERT(!m_file_name.empty());
+
+ m_cache.clear();
+
+ boost::property_tree::ptree pt;
+ try {
+ read_json(m_file_name, pt);
+ }
+ catch(boost::property_tree::json_parser::json_parser_error &e){
+ // no saved cache file, ignore
+ return;
+ }
+
+ std::string stored_device;
+ try {
+ stored_device = pt.get<std::string>("header.device");
+ }
+ catch(boost::property_tree::ptree_bad_path&){
+ return;
+ }
+
+ std::string stored_version;
+ try {
+ stored_version = pt.get<std::string>("header.version");
+ }
+ catch(boost::property_tree::ptree_bad_path&){
+ return;
+ }
+
+ if(stored_device == m_device_name && stored_version == version_string()){
+ typedef boost::property_tree::ptree::const_iterator pt_iter;
+ for(pt_iter iter = pt.begin(); iter != pt.end(); ++iter){
+ if(iter->first == "header"){
+ // skip header
+ continue;
+ }
+
+ boost::property_tree::ptree child_pt = pt.get_child(iter->first);
+ for(pt_iter child_iter = child_pt.begin(); child_iter != child_pt.end(); ++child_iter){
+ set(iter->first, child_iter->first, boost::lexical_cast<uint_>(child_iter->second.data()));
+ }
+ }
+ }
+
+ m_dirty = false;
+ }
+#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE
+
+private:
+ bool m_dirty;
+ std::string m_device_name;
+ std::string m_file_name;
+ std::map<std::pair<std::string, std::string>, uint_> m_cache;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP
diff --git a/boost/compute/detail/path.hpp b/boost/compute/detail/path.hpp
new file mode 100644
index 0000000000..ec8760eaf9
--- /dev/null
+++ b/boost/compute/detail/path.hpp
@@ -0,0 +1,73 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_PATH_HPP
+#define BOOST_COMPUTE_DETAIL_PATH_HPP
+
+#include <boost/filesystem/path.hpp>
+#include <boost/filesystem/operations.hpp>
+#include <boost/compute/detail/getenv.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// Path delimiter symbol for the current OS.
+static const std::string& path_delim()
+{
+ static const std::string delim =
+ boost::filesystem::path("/").make_preferred().string();
+ return delim;
+}
+
+// Path to appdata folder.
+inline const std::string& appdata_path()
+{
+ #ifdef WIN32
+ static const std::string appdata = detail::getenv("APPDATA")
+ + path_delim() + "boost_compute";
+ #else
+ static const std::string appdata = detail::getenv("HOME")
+ + path_delim() + ".boost_compute";
+ #endif
+ return appdata;
+}
+
+// Path to cached binaries.
+inline std::string program_binary_path(const std::string &hash, bool create = false)
+{
+ std::string dir = detail::appdata_path() + path_delim()
+ + hash.substr(0, 2) + path_delim()
+ + hash.substr(2);
+
+ if(create && !boost::filesystem::exists(dir)){
+ boost::filesystem::create_directories(dir);
+ }
+
+ return dir + path_delim();
+}
+
+// Path to parameter caches.
+inline std::string parameter_cache_path(bool create = false)
+{
+ const static std::string dir = appdata_path() + path_delim() + "tune";
+
+ if(create && !boost::filesystem::exists(dir)){
+ boost::filesystem::create_directories(dir);
+ }
+
+ return dir + path_delim();
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_PATH_HPP
diff --git a/boost/compute/detail/print_range.hpp b/boost/compute/detail/print_range.hpp
new file mode 100644
index 0000000000..bfe02f6828
--- /dev/null
+++ b/boost/compute/detail/print_range.hpp
@@ -0,0 +1,82 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP
+#define BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP
+
+#include <vector>
+#include <iostream>
+#include <iterator>
+
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class InputIterator>
+inline void print_range(InputIterator first,
+ InputIterator last,
+ command_queue &queue,
+ typename boost::enable_if<
+ is_buffer_iterator<InputIterator>
+ >::type* = 0)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ value_type;
+
+ const size_t size = iterator_range_size(first, last);
+
+ // copy values to temporary vector on the host
+ std::vector<value_type> tmp(size);
+ ::boost::compute::copy(first, last, tmp.begin(), queue);
+
+ // print values
+ std::cout << "[ ";
+ for(size_t i = 0; i < size; i++){
+ std::cout << tmp[i];
+ if(i != size - 1){
+ std::cout << ", ";
+ }
+ }
+ std::cout << " ]" << std::endl;
+}
+
+template<class InputIterator>
+inline void print_range(InputIterator first,
+ InputIterator last,
+ command_queue &queue,
+ typename boost::enable_if_c<
+ !is_buffer_iterator<InputIterator>::value
+ >::type* = 0)
+{
+ typedef typename
+ std::iterator_traits<InputIterator>::value_type
+ value_type;
+
+ const context &context = queue.get_context();
+ const size_t size = iterator_range_size(first, last);
+
+ // copy values to temporary vector on the device
+ ::boost::compute::vector<value_type> tmp(size, context);
+ ::boost::compute::copy(first, last, tmp.begin(), queue);
+
+ print_range(tmp.begin(), tmp.end(), queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP
diff --git a/boost/compute/detail/read_write_single_value.hpp b/boost/compute/detail/read_write_single_value.hpp
new file mode 100644
index 0000000000..fde40d946c
--- /dev/null
+++ b/boost/compute/detail/read_write_single_value.hpp
@@ -0,0 +1,77 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP
+#define BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/command_queue.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// reads and returns a single value at index in the buffer
+template<class T>
+inline T read_single_value(const buffer &buffer,
+ size_t index,
+ command_queue &queue)
+{
+ BOOST_ASSERT(index < buffer.size() / sizeof(T));
+ BOOST_ASSERT(buffer.get_context() == queue.get_context());
+
+ T value;
+ queue.enqueue_read_buffer(buffer,
+ sizeof(T) * index,
+ sizeof(T),
+ &value);
+ return value;
+}
+
+// reads and returns a the first value in the buffer
+template<class T>
+inline T read_single_value(const buffer &buffer, command_queue &queue)
+{
+ return read_single_value<T>(buffer, 0, queue);
+}
+
+// writes a single value at index to the buffer
+template<class T>
+inline void write_single_value(const T &value,
+ const buffer &buffer,
+ size_t index,
+ command_queue &queue)
+{
+ BOOST_ASSERT(index < buffer.size() / sizeof(T));
+ BOOST_ASSERT(buffer.get_context() == queue.get_context());
+
+ queue.enqueue_write_buffer(buffer,
+ index * sizeof(T),
+ sizeof(T),
+ &value);
+}
+
+// writes value to the first location in buffer
+template<class T>
+inline void write_single_value(const T &value,
+ const buffer &buffer,
+ command_queue &queue)
+{
+ write_single_value<T>(value, buffer, 0, queue);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP
diff --git a/boost/compute/detail/sha1.hpp b/boost/compute/detail/sha1.hpp
new file mode 100644
index 0000000000..5685fa4407
--- /dev/null
+++ b/boost/compute/detail/sha1.hpp
@@ -0,0 +1,53 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_SHA1_HPP
+#define BOOST_COMPUTE_DETAIL_SHA1_HPP
+
+#include <sstream>
+#include <iomanip>
+#include <boost/uuid/sha1.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// Accumulates SHA1 hash of the passed strings.
+class sha1 {
+ public:
+ sha1(const std::string &s = "") {
+ if (!s.empty()) this->process(s);
+ }
+
+ sha1& process(const std::string &s) {
+ h.process_bytes(s.c_str(), s.size());
+ return *this;
+ }
+
+ operator std::string() {
+ unsigned int digest[5];
+ h.get_digest(digest);
+
+ std::ostringstream buf;
+ for(int i = 0; i < 5; ++i)
+ buf << std::hex << std::setfill('0') << std::setw(8) << digest[i];
+
+ return buf.str();
+ }
+ private:
+ boost::uuids::detail::sha1 h;
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+
+#endif // BOOST_COMPUTE_DETAIL_SHA1_HPP
diff --git a/boost/compute/detail/variadic_macros.hpp b/boost/compute/detail/variadic_macros.hpp
new file mode 100644
index 0000000000..60f44bd1a8
--- /dev/null
+++ b/boost/compute/detail/variadic_macros.hpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP
+#define BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP
+
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/config/config.hpp>
+#include <boost/preprocessor/tuple/to_seq.hpp>
+
+#if BOOST_PP_VARIADICS == 1
+# include <boost/preprocessor/variadic/size.hpp>
+#endif
+
+#ifdef BOOST_PP_VARIADIC_SIZE
+# define BOOST_COMPUTE_PP_VARIADIC_SIZE BOOST_PP_VARIADIC_SIZE
+#else
+# define BOOST_COMPUTE_PP_VARIADIC_SIZE(...) BOOST_COMPUTE_PP_VARIADIC_SIZE_I(__VA_ARGS__, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,)
+# define BOOST_COMPUTE_PP_VARIADIC_SIZE_I(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63, size, ...) size
+#endif
+
+#define BOOST_COMPUTE_PP_TUPLE_SIZE(tuple) \
+ BOOST_COMPUTE_PP_VARIADIC_SIZE tuple
+
+#define BOOST_COMPUTE_PP_TUPLE_TO_SEQ(tuple) \
+ BOOST_PP_TUPLE_TO_SEQ(BOOST_COMPUTE_PP_TUPLE_SIZE(tuple), tuple)
+
+#endif // BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP
diff --git a/boost/compute/detail/vendor.hpp b/boost/compute/detail/vendor.hpp
new file mode 100644
index 0000000000..0aa9c9c0d4
--- /dev/null
+++ b/boost/compute/detail/vendor.hpp
@@ -0,0 +1,38 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_VENDOR_HPP
+#define BOOST_COMPUTE_DETAIL_VENDOR_HPP
+
+#include <boost/compute/device.hpp>
+#include <boost/compute/platform.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// returns true if the device is an nvidia gpu
+inline bool is_nvidia_device(const device &device)
+{
+ std::string nvidia("NVIDIA");
+ return device.vendor().compare(0, nvidia.size(), nvidia) == 0;
+}
+
+// returns true if the device is an amd cpu or gpu
+inline bool is_amd_device(const device &device)
+{
+ return device.platform().vendor() == "Advanced Micro Devices, Inc.";
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_VENDOR_HPP
diff --git a/boost/compute/detail/work_size.hpp b/boost/compute/detail/work_size.hpp
new file mode 100644
index 0000000000..552d797b8b
--- /dev/null
+++ b/boost/compute/detail/work_size.hpp
@@ -0,0 +1,37 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
+#define BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
+
+#include <cmath>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// Given a total number of values (count), a number of values to
+// process per thread (vtp), and a number of threads to execute per
+// block (tpb), this function returns the global work size to be
+// passed to clEnqueueNDRangeKernel() for a 1D algorithm.
+inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb)
+{
+ size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt));
+ if(work_size % tpb != 0){
+ work_size += tpb - work_size % tpb;
+ }
+ return work_size;
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
diff --git a/boost/compute/device.hpp b/boost/compute/device.hpp
new file mode 100644
index 0000000000..5cf2e8c931
--- /dev/null
+++ b/boost/compute/device.hpp
@@ -0,0 +1,584 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_DEVICE_HPP
+#define BOOST_COMPUTE_DEVICE_HPP
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+
+namespace boost {
+namespace compute {
+
+class platform;
+
+/// \class device
+/// \brief A compute device.
+///
+/// Typical compute devices include GPUs and multi-core CPUs. A list
+/// of all compute devices available on a platform can be obtained
+/// via the platform::devices() method.
+///
+/// The default compute device for the system can be obtained with
+/// the system::default_device() method. For example:
+///
+/// \snippet test/test_device.cpp default_gpu
+///
+/// \see platform, context, command_queue
+class device
+{
+public:
+ enum type {
+ cpu = CL_DEVICE_TYPE_CPU,
+ gpu = CL_DEVICE_TYPE_GPU,
+ accelerator = CL_DEVICE_TYPE_ACCELERATOR
+ };
+
+ /// Creates a null device object.
+ device()
+ : m_id(0)
+ {
+ }
+
+ /// Creates a new device object for \p id. If \p retain is \c true,
+ /// the reference count for the device will be incremented.
+ explicit device(cl_device_id id, bool retain = true)
+ : m_id(id)
+ {
+ #ifdef CL_VERSION_1_2
+ if(m_id && retain && is_subdevice()){
+ clRetainDevice(m_id);
+ }
+ #else
+ (void) retain;
+ #endif
+ }
+
+ /// Creates a new device object as a copy of \p other.
+ device(const device &other)
+ : m_id(other.m_id)
+ {
+ #ifdef CL_VERSION_1_2
+ if(m_id && is_subdevice()){
+ clRetainDevice(m_id);
+ }
+ #endif
+ }
+
+ /// Copies the device from \p other to \c *this.
+ device& operator=(const device &other)
+ {
+ if(this != &other){
+ #ifdef CL_VERSION_1_2
+ if(m_id && is_subdevice()){
+ clReleaseDevice(m_id);
+ }
+ #endif
+
+ m_id = other.m_id;
+
+ #ifdef CL_VERSION_1_2
+ if(m_id && is_subdevice()){
+ clRetainDevice(m_id);
+ }
+ #endif
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new device object from \p other.
+ device(device&& other) BOOST_NOEXCEPT
+ : m_id(other.m_id)
+ {
+ other.m_id = 0;
+ }
+
+ /// Move-assigns the device from \p other to \c *this.
+ device& operator=(device&& other) BOOST_NOEXCEPT
+ {
+ #ifdef CL_VERSION_1_2
+ if(m_id && is_subdevice()){
+ clReleaseDevice(m_id);
+ }
+ #endif
+
+ m_id = other.m_id;
+ other.m_id = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the device object.
+ ~device()
+ {
+ #ifdef CL_VERSION_1_2
+ if(m_id && is_subdevice()){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseDevice(m_id)
+ );
+ }
+ #endif
+ }
+
+ /// Returns the ID of the device.
+ cl_device_id id() const
+ {
+ return m_id;
+ }
+
+ /// Returns a reference to the underlying OpenCL device id.
+ cl_device_id& get() const
+ {
+ return const_cast<cl_device_id&>(m_id);
+ }
+
+ /// Returns the type of the device.
+ cl_device_type type() const
+ {
+ return get_info<cl_device_type>(CL_DEVICE_TYPE);
+ }
+
+ #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED
+ /// Returns the platform for the device.
+ platform platform() const;
+ #else
+ boost::compute::platform platform() const;
+ #endif
+
+ /// Returns the name of the device.
+ std::string name() const
+ {
+ return get_info<std::string>(CL_DEVICE_NAME);
+ }
+
+ /// Returns the name of the vendor for the device.
+ std::string vendor() const
+ {
+ return get_info<std::string>(CL_DEVICE_VENDOR);
+ }
+
+ /// Returns the device profile string.
+ std::string profile() const
+ {
+ return get_info<std::string>(CL_DEVICE_PROFILE);
+ }
+
+ /// Returns the device version string.
+ std::string version() const
+ {
+ return get_info<std::string>(CL_DEVICE_VERSION);
+ }
+
+ /// Returns the driver version string.
+ std::string driver_version() const
+ {
+ return get_info<std::string>(CL_DRIVER_VERSION);
+ }
+
+ /// Returns a list of extensions supported by the device.
+ std::vector<std::string> extensions() const
+ {
+ std::string extensions_string =
+ get_info<std::string>(CL_DEVICE_EXTENSIONS);
+ std::vector<std::string> extensions_vector;
+ boost::split(extensions_vector,
+ extensions_string,
+ boost::is_any_of("\t "),
+ boost::token_compress_on);
+ return extensions_vector;
+ }
+
+ /// Returns \c true if the device supports the extension with
+ /// \p name.
+ bool supports_extension(const std::string &name) const
+ {
+ const std::vector<std::string> extensions = this->extensions();
+
+ return std::find(
+ extensions.begin(), extensions.end(), name) != extensions.end();
+ }
+
+ /// Returns the number of address bits.
+ uint_ address_bits() const
+ {
+ return get_info<uint_>(CL_DEVICE_ADDRESS_BITS);
+ }
+
+ /// Returns the global memory size in bytes.
+ ulong_ global_memory_size() const
+ {
+ return get_info<ulong_>(CL_DEVICE_GLOBAL_MEM_SIZE);
+ }
+
+ /// Returns the local memory size in bytes.
+ ulong_ local_memory_size() const
+ {
+ return get_info<ulong_>(CL_DEVICE_LOCAL_MEM_SIZE);
+ }
+
+ /// Returns the clock frequency for the device's compute units.
+ uint_ clock_frequency() const
+ {
+ return get_info<uint_>(CL_DEVICE_MAX_CLOCK_FREQUENCY);
+ }
+
+ /// Returns the number of compute units in the device.
+ uint_ compute_units() const
+ {
+ return get_info<uint_>(CL_DEVICE_MAX_COMPUTE_UNITS);
+ }
+
+ /// \internal_
+ ulong_ max_memory_alloc_size() const
+ {
+ return get_info<ulong_>(CL_DEVICE_MAX_MEM_ALLOC_SIZE);
+ }
+
+ /// \internal_
+ size_t max_work_group_size() const
+ {
+ return get_info<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
+ }
+
+ /// \internal_
+ uint_ max_work_item_dimensions() const
+ {
+ return get_info<uint_>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
+ }
+
+ /// Returns the preferred vector width for type \c T.
+ template<class T>
+ uint_ preferred_vector_width() const
+ {
+ return 0;
+ }
+
+ /// Returns the profiling timer resolution in nanoseconds.
+ size_t profiling_timer_resolution() const
+ {
+ return get_info<size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION);
+ }
+
+ /// Returns \c true if the device is a sub-device.
+ bool is_subdevice() const
+ {
+ #if defined(CL_VERSION_1_2)
+ try {
+ return get_info<cl_device_id>(CL_DEVICE_PARENT_DEVICE) != 0;
+ }
+ catch(opencl_error&){
+ // the get_info() call above will throw if the device's opencl version
+ // is less than 1.2 (in which case it can't be a sub-device).
+ return false;
+ }
+ #else
+ return false;
+ #endif
+ }
+
+ /// Returns information about the device.
+ ///
+ /// For example, to get the number of compute units:
+ /// \code
+ /// device.get_info<cl_uint>(CL_DEVICE_MAX_COMPUTE_UNITS);
+ /// \endcode
+ ///
+ /// Alternatively, the template-specialized version can be used which
+ /// automatically determines the result type:
+ /// \code
+ /// device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>();
+ /// \endcode
+ ///
+ /// \see_opencl_ref{clGetDeviceInfo}
+ template<class T>
+ T get_info(cl_device_info info) const
+ {
+ return detail::get_object_info<T>(clGetDeviceInfo, m_id, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<device, Enum>::type
+ get_info() const;
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Partitions the device into multiple sub-devices according to
+ /// \p properties.
+ ///
+ /// \opencl_version_warning{1,2}
+ std::vector<device>
+ partition(const cl_device_partition_property *properties) const
+ {
+ // get sub-device count
+ uint_ count = 0;
+ int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ // get sub-device ids
+ std::vector<cl_device_id> ids(count);
+ ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ // convert ids to device objects
+ std::vector<device> devices(count);
+ for(size_t i = 0; i < count; i++){
+ devices[i] = device(ids[i], false);
+ }
+
+ return devices;
+ }
+
+ /// \opencl_version_warning{1,2}
+ std::vector<device> partition_equally(size_t count) const
+ {
+ cl_device_partition_property properties[] = {
+ CL_DEVICE_PARTITION_EQUALLY,
+ static_cast<cl_device_partition_property>(count),
+ 0
+ };
+
+ return partition(properties);
+ }
+
+ /// \opencl_version_warning{1,2}
+ std::vector<device>
+ partition_by_counts(const std::vector<size_t> &counts) const
+ {
+ std::vector<cl_device_partition_property> properties;
+
+ properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS);
+ for(size_t i = 0; i < counts.size(); i++){
+ properties.push_back(
+ static_cast<cl_device_partition_property>(counts[i]));
+ }
+ properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS_LIST_END);
+ properties.push_back(0);
+
+ return partition(&properties[0]);
+ }
+
+ /// \opencl_version_warning{1,2}
+ std::vector<device>
+ partition_by_affinity_domain(cl_device_affinity_domain domain) const
+ {
+ cl_device_partition_property properties[] = {
+ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN,
+ static_cast<cl_device_partition_property>(domain),
+ 0
+ };
+
+ return partition(properties);
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Returns \c true if the device is the same at \p other.
+ bool operator==(const device &other) const
+ {
+ return m_id == other.m_id;
+ }
+
+ /// Returns \c true if the device is different from \p other.
+ bool operator!=(const device &other) const
+ {
+ return m_id != other.m_id;
+ }
+
+ /// \internal_
+ bool check_version(int major, int minor) const
+ {
+ std::stringstream stream;
+ stream << version();
+
+ int actual_major, actual_minor;
+ stream.ignore(7); // 'OpenCL '
+ stream >> actual_major;
+ stream.ignore(1); // '.'
+ stream >> actual_minor;
+
+ return actual_major > major ||
+ (actual_major == major && actual_minor >= minor);
+ }
+
+private:
+ cl_device_id m_id;
+};
+
+/// \internal_
+template<>
+inline uint_ device::preferred_vector_width<short_>() const
+{
+ return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT);
+}
+
+/// \internal_
+template<>
+inline uint_ device::preferred_vector_width<int_>() const
+{
+ return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT);
+}
+
+/// \internal_
+template<>
+inline uint_ device::preferred_vector_width<long_>() const
+{
+ return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG);
+}
+
+/// \internal_
+template<>
+inline uint_ device::preferred_vector_width<float_>() const
+{
+ return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT);
+}
+
+/// \internal_
+template<>
+inline uint_ device::preferred_vector_width<double_>() const
+{
+ return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE);
+}
+
+/// \internal_ define get_info() specializations for device
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
+ ((cl_uint, CL_DEVICE_ADDRESS_BITS))
+ ((bool, CL_DEVICE_AVAILABLE))
+ ((bool, CL_DEVICE_COMPILER_AVAILABLE))
+ ((bool, CL_DEVICE_ENDIAN_LITTLE))
+ ((bool, CL_DEVICE_ERROR_CORRECTION_SUPPORT))
+ ((cl_device_exec_capabilities, CL_DEVICE_EXECUTION_CAPABILITIES))
+ ((std::string, CL_DEVICE_EXTENSIONS))
+ ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE))
+ ((cl_device_mem_cache_type, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE))
+ ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE))
+ ((cl_ulong, CL_DEVICE_GLOBAL_MEM_SIZE))
+ ((bool, CL_DEVICE_IMAGE_SUPPORT))
+ ((size_t, CL_DEVICE_IMAGE2D_MAX_HEIGHT))
+ ((size_t, CL_DEVICE_IMAGE2D_MAX_WIDTH))
+ ((size_t, CL_DEVICE_IMAGE3D_MAX_DEPTH))
+ ((size_t, CL_DEVICE_IMAGE3D_MAX_HEIGHT))
+ ((size_t, CL_DEVICE_IMAGE3D_MAX_WIDTH))
+ ((cl_ulong, CL_DEVICE_LOCAL_MEM_SIZE))
+ ((cl_device_local_mem_type, CL_DEVICE_LOCAL_MEM_TYPE))
+ ((cl_uint, CL_DEVICE_MAX_CLOCK_FREQUENCY))
+ ((cl_uint, CL_DEVICE_MAX_COMPUTE_UNITS))
+ ((cl_uint, CL_DEVICE_MAX_CONSTANT_ARGS))
+ ((cl_ulong, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE))
+ ((cl_ulong, CL_DEVICE_MAX_MEM_ALLOC_SIZE))
+ ((size_t, CL_DEVICE_MAX_PARAMETER_SIZE))
+ ((cl_uint, CL_DEVICE_MAX_READ_IMAGE_ARGS))
+ ((cl_uint, CL_DEVICE_MAX_SAMPLERS))
+ ((size_t, CL_DEVICE_MAX_WORK_GROUP_SIZE))
+ ((cl_uint, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS))
+ ((std::vector<size_t>, CL_DEVICE_MAX_WORK_ITEM_SIZES))
+ ((cl_uint, CL_DEVICE_MAX_WRITE_IMAGE_ARGS))
+ ((cl_uint, CL_DEVICE_MEM_BASE_ADDR_ALIGN))
+ ((cl_uint, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE))
+ ((std::string, CL_DEVICE_NAME))
+ ((cl_platform_id, CL_DEVICE_PLATFORM))
+ ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR))
+ ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT))
+ ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT))
+ ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG))
+ ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT))
+ ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE))
+ ((std::string, CL_DEVICE_PROFILE))
+ ((size_t, CL_DEVICE_PROFILING_TIMER_RESOLUTION))
+ ((cl_command_queue_properties, CL_DEVICE_QUEUE_PROPERTIES))
+ ((cl_device_fp_config, CL_DEVICE_SINGLE_FP_CONFIG))
+ ((cl_device_type, CL_DEVICE_TYPE))
+ ((std::string, CL_DEVICE_VENDOR))
+ ((cl_uint, CL_DEVICE_VENDOR_ID))
+ ((std::string, CL_DEVICE_VERSION))
+ ((std::string, CL_DRIVER_VERSION))
+)
+
+#ifdef CL_DEVICE_DOUBLE_FP_CONFIG
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
+ ((cl_device_fp_config, CL_DEVICE_DOUBLE_FP_CONFIG))
+)
+#endif
+
+#ifdef CL_DEVICE_HALF_FP_CONFIG
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
+ ((cl_device_fp_config, CL_DEVICE_HALF_FP_CONFIG))
+)
+#endif
+
+#ifdef CL_VERSION_1_1
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
+ ((bool, CL_DEVICE_HOST_UNIFIED_MEMORY))
+ ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR))
+ ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT))
+ ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT))
+ ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG))
+ ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT))
+ ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE))
+ ((std::string, CL_DEVICE_OPENCL_C_VERSION))
+)
+#endif // CL_VERSION_1_1
+
+#ifdef CL_VERSION_1_2
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
+ ((std::string, CL_DEVICE_BUILT_IN_KERNELS))
+ ((bool, CL_DEVICE_LINKER_AVAILABLE))
+ ((cl_device_id, CL_DEVICE_PARENT_DEVICE))
+ ((cl_uint, CL_DEVICE_PARTITION_MAX_SUB_DEVICES))
+ ((cl_device_partition_property, CL_DEVICE_PARTITION_PROPERTIES))
+ ((cl_device_affinity_domain, CL_DEVICE_PARTITION_AFFINITY_DOMAIN))
+ ((cl_device_partition_property, CL_DEVICE_PARTITION_TYPE))
+ ((size_t, CL_DEVICE_PRINTF_BUFFER_SIZE))
+ ((bool, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC))
+ ((cl_uint, CL_DEVICE_REFERENCE_COUNT))
+)
+#endif // CL_VERSION_1_2
+
+#ifdef CL_VERSION_2_0
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
+ ((size_t, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE))
+ ((size_t, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE))
+ ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_EVENTS))
+ ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_QUEUES))
+ ((cl_uint, CL_DEVICE_MAX_PIPE_ARGS))
+ ((cl_uint, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS))
+ ((cl_uint, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS))
+ ((cl_uint, CL_DEVICE_PIPE_MAX_PACKET_SIZE))
+ ((cl_uint, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT))
+ ((cl_uint, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT))
+ ((cl_uint, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT))
+ ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE))
+ ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE))
+ ((cl_command_queue_properties, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES))
+ ((cl_device_svm_capabilities, CL_DEVICE_SVM_CAPABILITIES))
+ ((cl_uint, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT))
+ ((cl_uint, CL_DEVICE_IMAGE_PITCH_ALIGNMENT))
+)
+#endif // CL_VERSION_2_0
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_DEVICE_HPP
diff --git a/boost/compute/event.hpp b/boost/compute/event.hpp
new file mode 100644
index 0000000000..2f53d87650
--- /dev/null
+++ b/boost/compute/event.hpp
@@ -0,0 +1,338 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EVENT_HPP
+#define BOOST_COMPUTE_EVENT_HPP
+
+#include <boost/function.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/detail/duration.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class event
+/// \brief An event corresponding to an operation on a compute device
+///
+/// Event objects are used to track operations running on the device (such as
+/// kernel executions and memory transfers). Event objects are returned by the
+/// various \c enqueue_* methods of the command_queue class.
+///
+/// Events can be used to synchronize operations between the host and the
+/// device. The \c wait() method will block execution on the host until the
+/// operation corresponding to the event on the device has completed. The
+/// status of the operation can also be polled with the \c status() method.
+///
+/// Event objects can also be used for performance profiling. In order to use
+/// events for profiling, the command queue must be constructed with the
+/// \c CL_QUEUE_PROFILING_ENABLE flag. Then the \c duration() method can be
+/// used to retrieve the total duration of the operation on the device:
+/// \code
+/// std::cout << "time = " << e.duration<std::chrono::milliseconds>().count() << "ms\n";
+/// \endcode
+///
+/// \see \ref future "future<T>", wait_list
+class event
+{
+public:
+ /// \internal_
+ enum execution_status {
+ complete = CL_COMPLETE,
+ running = CL_RUNNING,
+ submitted = CL_SUBMITTED,
+ queued = CL_QUEUED
+ };
+
+ /// \internal_
+ enum command_type {
+ ndrange_kernel = CL_COMMAND_NDRANGE_KERNEL,
+ task = CL_COMMAND_TASK,
+ native_kernel = CL_COMMAND_NATIVE_KERNEL,
+ read_buffer = CL_COMMAND_READ_BUFFER,
+ write_buffer = CL_COMMAND_WRITE_BUFFER,
+ copy_buffer = CL_COMMAND_COPY_BUFFER,
+ read_image = CL_COMMAND_READ_IMAGE,
+ write_image = CL_COMMAND_WRITE_IMAGE,
+ copy_image = CL_COMMAND_COPY_IMAGE,
+ copy_image_to_buffer = CL_COMMAND_COPY_IMAGE_TO_BUFFER,
+ copy_buffer_to_image = CL_COMMAND_COPY_BUFFER_TO_IMAGE,
+ map_buffer = CL_COMMAND_MAP_BUFFER,
+ map_image = CL_COMMAND_MAP_IMAGE,
+ unmap_mem_object = CL_COMMAND_UNMAP_MEM_OBJECT,
+ marker = CL_COMMAND_MARKER,
+ aquire_gl_objects = CL_COMMAND_ACQUIRE_GL_OBJECTS,
+ release_gl_object = CL_COMMAND_RELEASE_GL_OBJECTS
+ #if defined(CL_VERSION_1_1)
+ ,
+ read_buffer_rect = CL_COMMAND_READ_BUFFER_RECT,
+ write_buffer_rect = CL_COMMAND_WRITE_BUFFER_RECT,
+ copy_buffer_rect = CL_COMMAND_COPY_BUFFER_RECT
+ #endif
+ };
+
+ /// \internal_
+ enum profiling_info {
+ profiling_command_queued = CL_PROFILING_COMMAND_QUEUED,
+ profiling_command_submit = CL_PROFILING_COMMAND_SUBMIT,
+ profiling_command_start = CL_PROFILING_COMMAND_START,
+ profiling_command_end = CL_PROFILING_COMMAND_END
+ };
+
+ /// Creates a null event object.
+ event()
+ : m_event(0)
+ {
+ }
+
+ explicit event(cl_event event, bool retain = true)
+ : m_event(event)
+ {
+ if(m_event && retain){
+ clRetainEvent(event);
+ }
+ }
+
+ /// Makes a new event as a copy of \p other.
+ event(const event &other)
+ : m_event(other.m_event)
+ {
+ if(m_event){
+ clRetainEvent(m_event);
+ }
+ }
+
+ /// Copies the event object from \p other to \c *this.
+ event& operator=(const event &other)
+ {
+ if(this != &other){
+ if(m_event){
+ clReleaseEvent(m_event);
+ }
+
+ m_event = other.m_event;
+
+ if(m_event){
+ clRetainEvent(m_event);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new event object from \p other.
+ event(event&& other) BOOST_NOEXCEPT
+ : m_event(other.m_event)
+ {
+ other.m_event = 0;
+ }
+
+ /// Move-assigns the event from \p other to \c *this.
+ event& operator=(event&& other) BOOST_NOEXCEPT
+ {
+ if(m_event){
+ clReleaseEvent(m_event);
+ }
+
+ m_event = other.m_event;
+ other.m_event = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the event object.
+ ~event()
+ {
+ if(m_event){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseEvent(m_event)
+ );
+ }
+ }
+
+ /// Returns a reference to the underlying OpenCL event object.
+ cl_event& get() const
+ {
+ return const_cast<cl_event &>(m_event);
+ }
+
+ /// Returns the status of the event.
+ cl_int status() const
+ {
+ return get_info<cl_int>(CL_EVENT_COMMAND_EXECUTION_STATUS);
+ }
+
+ /// Returns the command type for the event.
+ cl_command_type get_command_type() const
+ {
+ return get_info<cl_command_type>(CL_EVENT_COMMAND_TYPE);
+ }
+
+ /// Returns information about the event.
+ ///
+ /// \see_opencl_ref{clGetEventInfo}
+ template<class T>
+ T get_info(cl_event_info info) const
+ {
+ return detail::get_object_info<T>(clGetEventInfo, m_event, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<event, Enum>::type
+ get_info() const;
+
+ /// Returns profiling information for the event.
+ ///
+ /// \see event::duration()
+ ///
+ /// \see_opencl_ref{clGetEventProfilingInfo}
+ template<class T>
+ T get_profiling_info(cl_profiling_info info) const
+ {
+ return detail::get_object_info<T>(clGetEventProfilingInfo,
+ m_event,
+ info);
+ }
+
+ /// Blocks until the actions corresponding to the event have
+ /// completed.
+ void wait() const
+ {
+ cl_int ret = clWaitForEvents(1, &m_event);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Registers a function to be called when the event status changes to
+ /// \p status (by default CL_COMPLETE). The callback is passed the OpenCL
+ /// event object, the event status, and a pointer to arbitrary user data.
+ ///
+ /// \see_opencl_ref{clSetEventCallback}
+ ///
+ /// \opencl_version_warning{1,1}
+ void set_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)(
+ cl_event event, cl_int status, void *user_data
+ ),
+ cl_int status = CL_COMPLETE,
+ void *user_data = 0)
+ {
+ cl_int ret = clSetEventCallback(m_event, status, callback, user_data);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ /// Registers a generic function to be called when the event status
+ /// changes to \p status (by default \c CL_COMPLETE).
+ ///
+ /// The function specified by \p callback must be invokable with zero
+ /// arguments (e.g. \c callback()).
+ ///
+ /// \opencl_version_warning{1,1}
+ template<class Function>
+ void set_callback(Function callback, cl_int status = CL_COMPLETE)
+ {
+ set_callback(
+ event_callback_invoker,
+ status,
+ new boost::function<void()>(callback)
+ );
+ }
+ #endif // CL_VERSION_1_1
+
+ /// Returns the total duration of the event from \p start to \p end.
+ ///
+ /// For example, to print the number of milliseconds the event took to
+ /// execute:
+ /// \code
+ /// std::cout << event.duration<std::chrono::milliseconds>().count() << " ms" << std::endl;
+ /// \endcode
+ ///
+ /// \see event::get_profiling_info()
+ template<class Duration>
+ Duration duration(cl_profiling_info start = CL_PROFILING_COMMAND_START,
+ cl_profiling_info end = CL_PROFILING_COMMAND_END) const
+ {
+ const ulong_ nanoseconds =
+ get_profiling_info<ulong_>(end) - get_profiling_info<ulong_>(start);
+
+ return detail::make_duration_from_nanoseconds(Duration(), nanoseconds);
+ }
+
+ /// Returns \c true if the event is the same as \p other.
+ bool operator==(const event &other) const
+ {
+ return m_event == other.m_event;
+ }
+
+ /// Returns \c true if the event is different from \p other.
+ bool operator!=(const event &other) const
+ {
+ return m_event != other.m_event;
+ }
+
+ /// \internal_
+ operator cl_event() const
+ {
+ return m_event;
+ }
+
+ /// \internal_ (deprecated)
+ cl_int get_status() const
+ {
+ return status();
+ }
+
+private:
+ #ifdef CL_VERSION_1_1
+ /// \internal_
+ static void BOOST_COMPUTE_CL_CALLBACK
+ event_callback_invoker(cl_event, cl_int, void *user_data)
+ {
+ boost::function<void()> *callback =
+ static_cast<boost::function<void()> *>(user_data);
+
+ (*callback)();
+
+ delete callback;
+ }
+ #endif // CL_VERSION_1_1
+
+protected:
+ cl_event m_event;
+};
+
+/// \internal_ define get_info() specializations for event
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event,
+ ((cl_command_queue, CL_EVENT_COMMAND_QUEUE))
+ ((cl_command_type, CL_EVENT_COMMAND_TYPE))
+ ((cl_int, CL_EVENT_COMMAND_EXECUTION_STATUS))
+ ((cl_uint, CL_EVENT_REFERENCE_COUNT))
+)
+
+#ifdef CL_VERSION_1_1
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event,
+ ((cl_context, CL_EVENT_CONTEXT))
+)
+#endif
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EVENT_HPP
diff --git a/boost/compute/exception.hpp b/boost/compute/exception.hpp
new file mode 100644
index 0000000000..10a271de03
--- /dev/null
+++ b/boost/compute/exception.hpp
@@ -0,0 +1,23 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXCEPTION_HPP
+#define BOOST_COMPUTE_EXCEPTION_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute exception headers.
+
+#include <boost/compute/exception/context_error.hpp>
+#include <boost/compute/exception/no_device_found.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/exception/unsupported_extension_error.hpp>
+
+#endif // BOOST_COMPUTE_EXCEPTION_HPP
diff --git a/boost/compute/exception/context_error.hpp b/boost/compute/exception/context_error.hpp
new file mode 100644
index 0000000000..eeb387d884
--- /dev/null
+++ b/boost/compute/exception/context_error.hpp
@@ -0,0 +1,88 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP
+#define BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP
+
+#include <exception>
+
+namespace boost {
+namespace compute {
+
+class context;
+
+/// \class context_error
+/// \brief A run-time OpenCL context error.
+///
+/// The context_error exception is thrown when the OpenCL context encounters
+/// an error condition. Boost.Compute is notified of these error conditions by
+/// registering an error handler when creating context objects (via the
+/// \c pfn_notify argument to the \c clCreateContext() function).
+///
+/// This exception is different than the opencl_error exception which is thrown
+/// as a result of error caused when calling a single OpenCL API function.
+///
+/// \see opencl_error
+class context_error : public std::exception
+{
+public:
+ /// Creates a new context error exception object.
+ context_error(const context *context,
+ const char *errinfo,
+ const void *private_info,
+ size_t private_info_size) throw()
+ : m_context(context),
+ m_errinfo(errinfo),
+ m_private_info(private_info),
+ m_private_info_size(private_info_size)
+ {
+ }
+
+ /// Destroys the context error object.
+ ~context_error() throw()
+ {
+ }
+
+ /// Returns a string with a description of the error.
+ const char* what() const throw()
+ {
+ return m_errinfo;
+ }
+
+ /// Returns a pointer to the context object which generated the error
+ /// notification.
+ const context* get_context_ptr() const throw()
+ {
+ return m_context;
+ }
+
+ /// Returns a pointer to the private info memory block.
+ const void* get_private_info_ptr() const throw()
+ {
+ return m_private_info;
+ }
+
+ /// Returns the size of the private info memory block.
+ const size_t get_private_info_size() const throw()
+ {
+ return m_private_info_size;
+ }
+
+private:
+ const context *m_context;
+ const char *m_errinfo;
+ const void *m_private_info;
+ size_t m_private_info_size;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP
diff --git a/boost/compute/exception/no_device_found.hpp b/boost/compute/exception/no_device_found.hpp
new file mode 100644
index 0000000000..fb73942e0c
--- /dev/null
+++ b/boost/compute/exception/no_device_found.hpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP
+#define BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP
+
+#include <exception>
+
+namespace boost {
+namespace compute {
+
+/// \class no_device_found
+/// \brief Exception thrown when no OpenCL device is found
+///
+/// This exception is thrown when no valid OpenCL device can be found.
+///
+/// \see opencl_error
+class no_device_found : public std::exception
+{
+public:
+ /// Creates a new no_device_found exception object.
+ no_device_found() throw()
+ {
+ }
+
+ /// Destroys the no_device_found exception object.
+ ~no_device_found() throw()
+ {
+ }
+
+ /// Returns a string containing a human-readable error message.
+ const char* what() const throw()
+ {
+ return "No OpenCL device found";
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP
diff --git a/boost/compute/exception/opencl_error.hpp b/boost/compute/exception/opencl_error.hpp
new file mode 100644
index 0000000000..29a3a9d258
--- /dev/null
+++ b/boost/compute/exception/opencl_error.hpp
@@ -0,0 +1,158 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP
+#define BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP
+
+#include <exception>
+#include <string>
+#include <sstream>
+
+#include <boost/compute/cl.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class opencl_error
+/// \brief A run-time OpenCL error.
+///
+/// The opencl_error class represents an error returned from an OpenCL
+/// function.
+///
+/// \see context_error
+class opencl_error : public std::exception
+{
+public:
+ /// Creates a new opencl_error exception object for \p error.
+ explicit opencl_error(cl_int error) throw()
+ : m_error(error),
+ m_error_string(to_string(error))
+ {
+ }
+
+ /// Destroys the opencl_error object.
+ ~opencl_error() throw()
+ {
+ }
+
+ /// Returns the numeric error code.
+ cl_int error_code() const throw()
+ {
+ return m_error;
+ }
+
+ /// Returns a string description of the error.
+ std::string error_string() const throw()
+ {
+ return m_error_string;
+ }
+
+ /// Returns a C-string description of the error.
+ const char* what() const throw()
+ {
+ return m_error_string.c_str();
+ }
+
+ /// Static function which converts the numeric OpenCL error code \p error
+ /// to a human-readable string.
+ ///
+ /// For example:
+ /// \code
+ /// std::cout << opencl_error::to_string(CL_INVALID_KERNEL_ARGS) << std::endl;
+ /// \endcode
+ ///
+ /// Will print "Invalid Kernel Arguments".
+ ///
+ /// If the error code is unknown (e.g. not a valid OpenCL error), a string
+ /// containing "Unknown OpenCL Error" along with the error number will be
+ /// returned.
+ static std::string to_string(cl_int error)
+ {
+ switch(error){
+ case CL_SUCCESS: return "Success";
+ case CL_DEVICE_NOT_FOUND: return "Device Not Found";
+ case CL_DEVICE_NOT_AVAILABLE: return "Device Not Available";
+ case CL_COMPILER_NOT_AVAILABLE: return "Compiler Not Available";
+ case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory Object Allocation Failure";
+ case CL_OUT_OF_RESOURCES: return "Out of Resources";
+ case CL_OUT_OF_HOST_MEMORY: return "Out of Host Memory";
+ case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling Information Not Available";
+ case CL_MEM_COPY_OVERLAP: return "Memory Copy Overlap";
+ case CL_IMAGE_FORMAT_MISMATCH: return "Image Format Mismatch";
+ case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image Format Not Supported";
+ case CL_BUILD_PROGRAM_FAILURE: return "Build Program Failure";
+ case CL_MAP_FAILURE: return "Map Failure";
+ case CL_INVALID_VALUE: return "Invalid Value";
+ case CL_INVALID_DEVICE_TYPE: return "Invalid Device Type";
+ case CL_INVALID_PLATFORM: return "Invalid Platform";
+ case CL_INVALID_DEVICE: return "Invalid Device";
+ case CL_INVALID_CONTEXT: return "Invalid Context";
+ case CL_INVALID_QUEUE_PROPERTIES: return "Invalid Queue Properties";
+ case CL_INVALID_COMMAND_QUEUE: return "Invalid Command Queue";
+ case CL_INVALID_HOST_PTR: return "Invalid Host Pointer";
+ case CL_INVALID_MEM_OBJECT: return "Invalid Memory Object";
+ case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid Image Format Descriptor";
+ case CL_INVALID_IMAGE_SIZE: return "Invalid Image Size";
+ case CL_INVALID_SAMPLER: return "Invalid Sampler";
+ case CL_INVALID_BINARY: return "Invalid Binary";
+ case CL_INVALID_BUILD_OPTIONS: return "Invalid Build Options";
+ case CL_INVALID_PROGRAM: return "Invalid Program";
+ case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid Program Executable";
+ case CL_INVALID_KERNEL_NAME: return "Invalid Kernel Name";
+ case CL_INVALID_KERNEL_DEFINITION: return "Invalid Kernel Definition";
+ case CL_INVALID_KERNEL: return "Invalid Kernel";
+ case CL_INVALID_ARG_INDEX: return "Invalid Argument Index";
+ case CL_INVALID_ARG_VALUE: return "Invalid Argument Value";
+ case CL_INVALID_ARG_SIZE: return "Invalid Argument Size";
+ case CL_INVALID_KERNEL_ARGS: return "Invalid Kernel Arguments";
+ case CL_INVALID_WORK_DIMENSION: return "Invalid Work Dimension";
+ case CL_INVALID_WORK_GROUP_SIZE: return "Invalid Work Group Size";
+ case CL_INVALID_WORK_ITEM_SIZE: return "Invalid Work Item Size";
+ case CL_INVALID_GLOBAL_OFFSET: return "Invalid Global Offset";
+ case CL_INVALID_EVENT_WAIT_LIST: return "Invalid Event Wait List";
+ case CL_INVALID_EVENT: return "Invalid Event";
+ case CL_INVALID_OPERATION: return "Invalid Operation";
+ case CL_INVALID_GL_OBJECT: return "Invalid GL Object";
+ case CL_INVALID_BUFFER_SIZE: return "Invalid Buffer Size";
+ case CL_INVALID_MIP_LEVEL: return "Invalid MIP Level";
+ case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid Global Work Size";
+ #ifdef CL_VERSION_1_2
+ case CL_COMPILE_PROGRAM_FAILURE: return "Compile Program Failure";
+ case CL_LINKER_NOT_AVAILABLE: return "Linker Not Available";
+ case CL_LINK_PROGRAM_FAILURE: return "Link Program Failure";
+ case CL_DEVICE_PARTITION_FAILED: return "Device Partition Failed";
+ case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "Kernel Argument Info Not Available";
+ case CL_INVALID_PROPERTY: return "Invalid Property";
+ case CL_INVALID_IMAGE_DESCRIPTOR: return "Invalid Image Descriptor";
+ case CL_INVALID_COMPILER_OPTIONS: return "Invalid Compiler Options";
+ case CL_INVALID_LINKER_OPTIONS: return "Invalid Linker Options";
+ case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid Device Partition Count";
+ #endif // CL_VERSION_1_2
+ #ifdef CL_VERSION_2_0
+ case CL_INVALID_PIPE_SIZE: return "Invalid Pipe Size";
+ case CL_INVALID_DEVICE_QUEUE: return "Invalid Device Queue";
+ #endif
+ default: {
+ std::stringstream s;
+ s << "Unknown OpenCL Error (" << error << ")";
+ return s.str();
+ }
+ }
+ }
+
+private:
+ cl_int m_error;
+ std::string m_error_string;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP
diff --git a/boost/compute/exception/unsupported_extension_error.hpp b/boost/compute/exception/unsupported_extension_error.hpp
new file mode 100644
index 0000000000..c6f4de6c33
--- /dev/null
+++ b/boost/compute/exception/unsupported_extension_error.hpp
@@ -0,0 +1,71 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP
+#define BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP
+
+#include <exception>
+#include <sstream>
+#include <string>
+
+namespace boost {
+namespace compute {
+
+/// \class unsupported_extension_error
+/// \brief Exception thrown when attempting to use an unsupported
+/// OpenCL extension.
+///
+/// This exception is thrown when the user attempts to use an OpenCL
+/// extension which is not supported on the platform and/or device.
+///
+/// An example of this is attempting to use CL-GL sharing on a non-GPU
+/// device.
+///
+/// \see opencl_error
+class unsupported_extension_error : public std::exception
+{
+public:
+ /// Creates a new unsupported extension error exception object indicating
+ /// that \p extension is not supported by the OpenCL platform or device.
+ explicit unsupported_extension_error(const char *extension) throw()
+ : m_extension(extension)
+ {
+ std::stringstream msg;
+ msg << "OpenCL extension " << extension << " not supported";
+ m_error_string = msg.str();
+ }
+
+ /// Destroys the unsupported extension error object.
+ ~unsupported_extension_error() throw()
+ {
+ }
+
+ /// Returns the name of the unsupported extension.
+ std::string extension_name() const throw()
+ {
+ return m_extension;
+ }
+
+ /// Returns a string containing a human-readable error message containing
+ /// the name of the unsupported exception.
+ const char* what() const throw()
+ {
+ return m_error_string.c_str();
+ }
+
+private:
+ std::string m_extension;
+ std::string m_error_string;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP
diff --git a/boost/compute/experimental/clamp_range.hpp b/boost/compute/experimental/clamp_range.hpp
new file mode 100644
index 0000000000..0c2260498f
--- /dev/null
+++ b/boost/compute/experimental/clamp_range.hpp
@@ -0,0 +1,49 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP
+#define BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP
+
+#include <iterator>
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+
+namespace boost {
+namespace compute {
+namespace experimental {
+
+template<class InputIterator, class OutputIterator>
+inline OutputIterator
+clamp_range(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ typename std::iterator_traits<InputIterator>::value_type lo,
+ typename std::iterator_traits<InputIterator>::value_type hi,
+ command_queue &queue)
+{
+ using ::boost::compute::lambda::_1;
+ using ::boost::compute::lambda::_2;
+ using ::boost::compute::lambda::clamp;
+
+ return ::boost::compute::transform(
+ first,
+ last,
+ result,
+ clamp(_1, lo, hi),
+ queue
+ );
+}
+
+} // end experimental namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP
diff --git a/boost/compute/experimental/malloc.hpp b/boost/compute/experimental/malloc.hpp
new file mode 100644
index 0000000000..ad96888743
--- /dev/null
+++ b/boost/compute/experimental/malloc.hpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP
+#define BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/detail/device_ptr.hpp>
+
+namespace boost {
+namespace compute {
+namespace experimental {
+
+// bring device_ptr into the experimental namespace
+using detail::device_ptr;
+
+template<class T>
+inline device_ptr<T>
+malloc(std::size_t size, const context &context = system::default_context())
+{
+ buffer buf(context, size * sizeof(T));
+ clRetainMemObject(buf.get());
+ return device_ptr<T>(buf);
+}
+
+inline device_ptr<char>
+malloc(std::size_t size, const context &context = system::default_context())
+{
+ return malloc<char>(size, context);
+}
+
+template<class T>
+inline void free(device_ptr<T> &ptr)
+{
+ clReleaseMemObject(ptr.get_buffer().get());
+}
+
+} // end experimental namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP
diff --git a/boost/compute/experimental/sort_by_transform.hpp b/boost/compute/experimental/sort_by_transform.hpp
new file mode 100644
index 0000000000..3d84ba9810
--- /dev/null
+++ b/boost/compute/experimental/sort_by_transform.hpp
@@ -0,0 +1,66 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP
+#define BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/sort_by_key.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+namespace experimental {
+
+template<class Iterator, class Transform, class Compare>
+inline void sort_by_transform(Iterator first,
+ Iterator last,
+ Transform transform,
+ Compare compare,
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<Iterator>::value_type value_type;
+ typedef typename boost::compute::result_of<Transform(value_type)>::type key_type;
+
+ size_t n = detail::iterator_range_size(first, last);
+ if(n < 2){
+ return;
+ }
+
+ const context &context = queue.get_context();
+
+ ::boost::compute::vector<key_type> keys(n, context);
+
+ ::boost::compute::transform(
+ first,
+ last,
+ keys.begin(),
+ transform,
+ queue
+ );
+
+ ::boost::compute::sort_by_key(
+ keys.begin(),
+ keys.end(),
+ first,
+ compare,
+ queue
+ );
+}
+
+} // end experimental namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP
diff --git a/boost/compute/experimental/tabulate.hpp b/boost/compute/experimental/tabulate.hpp
new file mode 100644
index 0000000000..4f607e7961
--- /dev/null
+++ b/boost/compute/experimental/tabulate.hpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP
+#define BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP
+
+#include <iterator>
+
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/iterator/counting_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace experimental {
+
+template<class Iterator, class UnaryFunction>
+inline void tabulate(Iterator first,
+ Iterator last,
+ UnaryFunction function,
+ command_queue &queue)
+{
+ size_t n = detail::iterator_range_size(first, last);
+
+ ::boost::compute::transform(
+ ::boost::compute::make_counting_iterator<int>(0),
+ ::boost::compute::make_counting_iterator<int>(n),
+ first,
+ function,
+ queue
+ );
+}
+
+} // end experimental namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP
diff --git a/boost/compute/function.hpp b/boost/compute/function.hpp
new file mode 100644
index 0000000000..e83f16808a
--- /dev/null
+++ b/boost/compute/function.hpp
@@ -0,0 +1,454 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTION_HPP
+#define BOOST_COMPUTE_FUNCTION_HPP
+
+#include <map>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include <boost/assert.hpp>
+#include <boost/config.hpp>
+#include <boost/function_types/parameter_types.hpp>
+#include <boost/preprocessor/repetition.hpp>
+#include <boost/mpl/for_each.hpp>
+#include <boost/mpl/size.hpp>
+#include <boost/mpl/transform.hpp>
+#include <boost/static_assert.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/type_traits/add_pointer.hpp>
+#include <boost/type_traits/function_traits.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/config.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class ResultType, class ArgTuple>
+class invoked_function
+{
+public:
+ typedef ResultType result_type;
+
+ BOOST_STATIC_CONSTANT(
+ size_t, arity = boost::tuples::length<ArgTuple>::value
+ );
+
+ invoked_function(const std::string &name,
+ const std::string &source)
+ : m_name(name),
+ m_source(source)
+ {
+ }
+
+ invoked_function(const std::string &name,
+ const std::string &source,
+ const std::map<std::string, std::string> &definitions)
+ : m_name(name),
+ m_source(source),
+ m_definitions(definitions)
+ {
+ }
+
+ invoked_function(const std::string &name,
+ const std::string &source,
+ const ArgTuple &args)
+ : m_name(name),
+ m_source(source),
+ m_args(args)
+ {
+ }
+
+ invoked_function(const std::string &name,
+ const std::string &source,
+ const std::map<std::string, std::string> &definitions,
+ const ArgTuple &args)
+ : m_name(name),
+ m_source(source),
+ m_definitions(definitions),
+ m_args(args)
+ {
+ }
+
+ std::string name() const
+ {
+ return m_name;
+ }
+
+ std::string source() const
+ {
+ return m_source;
+ }
+
+ const std::map<std::string, std::string>& definitions() const
+ {
+ return m_definitions;
+ }
+
+ const ArgTuple& args() const
+ {
+ return m_args;
+ }
+
+private:
+ std::string m_name;
+ std::string m_source;
+ std::map<std::string, std::string> m_definitions;
+ ArgTuple m_args;
+};
+
+} // end detail namespace
+
+/// \class function
+/// \brief A function object.
+template<class Signature>
+class function
+{
+public:
+ /// \internal_
+ typedef typename
+ boost::function_traits<Signature>::result_type result_type;
+
+ /// \internal_
+ BOOST_STATIC_CONSTANT(
+ size_t, arity = boost::function_traits<Signature>::arity
+ );
+
+ /// \internal_
+ typedef Signature signature;
+
+ /// Creates a new function object with \p name.
+ function(const std::string &name)
+ : m_name(name)
+ {
+ }
+
+ /// Destroys the function object.
+ ~function()
+ {
+ }
+
+ /// \internal_
+ std::string name() const
+ {
+ return m_name;
+ }
+
+ /// \internal_
+ void set_source(const std::string &source)
+ {
+ m_source = source;
+ }
+
+ /// \internal_
+ std::string source() const
+ {
+ return m_source;
+ }
+
+ /// \internal_
+ void define(std::string name, std::string value = std::string())
+ {
+ m_definitions[name] = value;
+ }
+
+ /// \internal_
+ detail::invoked_function<result_type, boost::tuple<> >
+ operator()() const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 0,
+ "Non-nullary function invoked with zero arguments"
+ );
+
+ return detail::invoked_function<result_type, boost::tuple<> >(
+ m_name, m_source, m_definitions
+ );
+ }
+
+ /// \internal_
+ template<class Arg1>
+ detail::invoked_function<result_type, boost::tuple<Arg1> >
+ operator()(const Arg1 &arg1) const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 1,
+ "Non-unary function invoked one argument"
+ );
+
+ return detail::invoked_function<result_type, boost::tuple<Arg1> >(
+ m_name, m_source, m_definitions, boost::make_tuple(arg1)
+ );
+ }
+
+ /// \internal_
+ template<class Arg1, class Arg2>
+ detail::invoked_function<result_type, boost::tuple<Arg1, Arg2> >
+ operator()(const Arg1 &arg1, const Arg2 &arg2) const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 2,
+ "Non-binary function invoked with two arguments"
+ );
+
+ return detail::invoked_function<result_type, boost::tuple<Arg1, Arg2> >(
+ m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2)
+ );
+ }
+
+ /// \internal_
+ template<class Arg1, class Arg2, class Arg3>
+ detail::invoked_function<result_type, boost::tuple<Arg1, Arg2, Arg3> >
+ operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const
+ {
+ BOOST_STATIC_ASSERT_MSG(
+ arity == 3,
+ "Non-ternary function invoked with two arguments"
+ );
+
+ return detail::invoked_function<result_type, boost::tuple<Arg1, Arg2, Arg3> >(
+ m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3)
+ );
+ }
+
+private:
+ std::string m_name;
+ std::string m_source;
+ std::map<std::string, std::string> m_definitions;
+};
+
+/// Creates a function object given its \p name and \p source.
+///
+/// \param name The function name.
+/// \param source The function source code.
+///
+/// \see BOOST_COMPUTE_FUNCTION()
+template<class Signature>
+inline function<Signature>
+make_function_from_source(const std::string &name, const std::string &source)
+{
+ function<Signature> f(name);
+ f.set_source(source);
+ return f;
+}
+
+namespace detail {
+
+// given a string containing the arguments declaration for a function
+// like: "(int a, const float b)", returns a vector containing the name
+// of each argument (e.g. ["a", "b"]).
+inline std::vector<std::string> parse_argument_names(const char *arguments)
+{
+ BOOST_ASSERT_MSG(
+ arguments[0] == '(' && arguments[std::strlen(arguments)-1] == ')',
+ "Arguments should start and end with parentheses"
+ );
+
+ std::vector<std::string> args;
+
+ size_t last_space = 0;
+ size_t skip_comma = 0;
+ for(size_t i = 1; i < std::strlen(arguments) - 2; i++){
+ const char c = arguments[i];
+
+ if(c == ' '){
+ last_space = i;
+ }
+ else if(c == ',' && !skip_comma){
+ std::string name(
+ arguments + last_space + 1, i - last_space - 1
+ );
+ args.push_back(name);
+ }
+ else if(c == '<'){
+ skip_comma++;
+ }
+ else if(c == '>'){
+ skip_comma--;
+ }
+ }
+
+ std::string last_argument(
+ arguments + last_space + 1, std::strlen(arguments) - last_space - 2
+ );
+ args.push_back(last_argument);
+
+ return args;
+}
+
+struct signature_argument_inserter
+{
+ signature_argument_inserter(std::stringstream &s_, const char *arguments, size_t last)
+ : s(s_)
+ {
+ n = 0;
+ m_last = last;
+
+ m_argument_names = parse_argument_names(arguments);
+
+ BOOST_ASSERT_MSG(
+ m_argument_names.size() == last,
+ "Wrong number of arguments"
+ );
+ }
+
+ template<class T>
+ void operator()(const T*)
+ {
+ s << type_name<T>() << " " << m_argument_names[n];
+ if(n+1 < m_last){
+ s << ", ";
+ }
+ n++;
+ }
+
+ size_t n;
+ size_t m_last;
+ std::stringstream &s;
+ std::vector<std::string> m_argument_names;
+};
+
+template<class Signature>
+inline std::string make_function_declaration(const char *name, const char *arguments)
+{
+ typedef typename
+ boost::function_traits<Signature>::result_type result_type;
+ typedef typename
+ boost::function_types::parameter_types<Signature>::type parameter_types;
+ typedef typename
+ mpl::size<parameter_types>::type arity_type;
+
+ std::stringstream s;
+ s << "inline " << type_name<result_type>() << " " << name;
+ s << "(";
+
+ if(arity_type::value > 0){
+ signature_argument_inserter i(s, arguments, arity_type::value);
+ mpl::for_each<
+ typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1>
+ >::type>(i);
+ }
+
+ s << ")";
+ return s.str();
+}
+
+struct argument_list_inserter
+{
+ argument_list_inserter(std::stringstream &s_, const char first, size_t last)
+ : s(s_)
+ {
+ n = 0;
+ m_last = last;
+ m_name = first;
+ }
+
+ template<class T>
+ void operator()(const T*)
+ {
+ s << type_name<T>() << " " << m_name++;
+ if(n+1 < m_last){
+ s << ", ";
+ }
+ n++;
+ }
+
+ size_t n;
+ size_t m_last;
+ char m_name;
+ std::stringstream &s;
+};
+
+template<class Signature>
+inline std::string generate_argument_list(const char first = 'a')
+{
+ typedef typename
+ boost::function_types::parameter_types<Signature>::type parameter_types;
+ typedef typename
+ mpl::size<parameter_types>::type arity_type;
+
+ std::stringstream s;
+ s << '(';
+
+ if(arity_type::value > 0){
+ argument_list_inserter i(s, first, arity_type::value);
+ mpl::for_each<
+ typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1>
+ >::type>(i);
+ }
+
+ s << ')';
+ return s.str();
+}
+
+// used by the BOOST_COMPUTE_FUNCTION() macro to create a function
+// with the given signature, name, arguments, and source.
+template<class Signature>
+inline function<Signature>
+make_function_impl(const char *name, const char *arguments, const char *source)
+{
+ std::stringstream s;
+ s << make_function_declaration<Signature>(name, arguments);
+ s << source;
+
+ return make_function_from_source<Signature>(name, s.str());
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+/// Creates a function object with \p name and \p source.
+///
+/// \param return_type The return type for the function.
+/// \param name The name of the function.
+/// \param arguments A list of arguments for the function.
+/// \param source The OpenCL C source code for the function.
+///
+/// The function declaration and signature are automatically created using
+/// the \p return_type, \p name, and \p arguments macro parameters.
+///
+/// The source code for the function is interpreted as OpenCL C99 source code
+/// which is stringified and passed to the OpenCL compiler when the function
+/// is invoked.
+///
+/// For example, to create a function which squares a number:
+/// \code
+/// BOOST_COMPUTE_FUNCTION(float, square, (float x),
+/// {
+/// return x * x;
+/// });
+/// \endcode
+///
+/// And to create a function which sums two numbers:
+/// \code
+/// BOOST_COMPUTE_FUNCTION(int, sum_two, (int x, int y),
+/// {
+/// return x + y;
+/// });
+/// \endcode
+///
+/// \see BOOST_COMPUTE_CLOSURE()
+#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED
+#define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, source)
+#else
+#define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, ...) \
+ ::boost::compute::function<return_type arguments> name = \
+ ::boost::compute::detail::make_function_impl<return_type arguments>( \
+ #name, #arguments, #__VA_ARGS__ \
+ )
+#endif
+
+#endif // BOOST_COMPUTE_FUNCTION_HPP
diff --git a/boost/compute/functional.hpp b/boost/compute/functional.hpp
new file mode 100644
index 0000000000..d2065216f4
--- /dev/null
+++ b/boost/compute/functional.hpp
@@ -0,0 +1,34 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute functional headers.
+
+#include <boost/compute/functional/as.hpp>
+#include <boost/compute/functional/atomic.hpp>
+#include <boost/compute/functional/common.hpp>
+#include <boost/compute/functional/convert.hpp>
+#include <boost/compute/functional/field.hpp>
+#include <boost/compute/functional/geometry.hpp>
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/functional/hash.hpp>
+#include <boost/compute/functional/identity.hpp>
+#include <boost/compute/functional/integer.hpp>
+#include <boost/compute/functional/logical.hpp>
+#include <boost/compute/functional/math.hpp>
+#include <boost/compute/functional/operator.hpp>
+#include <boost/compute/functional/popcount.hpp>
+#include <boost/compute/functional/relational.hpp>
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_HPP
diff --git a/boost/compute/functional/as.hpp b/boost/compute/functional/as.hpp
new file mode 100644
index 0000000000..584bd0f38a
--- /dev/null
+++ b/boost/compute/functional/as.hpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_AS_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_AS_HPP
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T, class Arg>
+struct invoked_as
+{
+ invoked_as(const Arg &arg)
+ : m_arg(arg)
+ {
+ }
+
+ Arg m_arg;
+};
+
+} // end detail namespace
+
+/// The \ref as function converts its argument to type \c T (similar to
+/// reinterpret_cast<T>).
+///
+/// \see \ref convert "convert<T>"
+template<class T>
+struct as
+{
+ typedef T result_type;
+
+ /// \internal_
+ template<class Arg>
+ detail::invoked_as<T, Arg> operator()(const Arg &arg) const
+ {
+ return detail::invoked_as<T, Arg>(arg);
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_AS_HPP
diff --git a/boost/compute/functional/atomic.hpp b/boost/compute/functional/atomic.hpp
new file mode 100644
index 0000000000..2701561bc3
--- /dev/null
+++ b/boost/compute/functional/atomic.hpp
@@ -0,0 +1,141 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/function.hpp>
+
+#ifndef BOOST_COMPUTE_DOXYGEN_INVOKED
+#ifdef CL_VERSION_1_1
+ #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atomic_"
+#else
+ #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atom_"
+#endif
+#endif // BOOST_COMPUTE_DOXYGEN_INVOKED
+
+namespace boost {
+namespace compute {
+
+template<class T>
+class atomic_add : public function<T (T*, T)>
+{
+public:
+ atomic_add()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "add")
+ {
+ }
+};
+
+template<class T>
+class atomic_sub : public function<T (T*, T)>
+{
+public:
+ atomic_sub()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "sub")
+ {
+ }
+};
+
+template<class T>
+class atomic_xchg : public function<T (T*, T)>
+{
+public:
+ atomic_xchg()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xchg")
+ {
+ }
+};
+
+template<class T>
+class atomic_inc : public function<T (T*)>
+{
+public:
+ atomic_inc()
+ : function<T (T*)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "inc")
+ {
+ }
+};
+
+template<class T>
+class atomic_dec : public function<T (T*)>
+{
+public:
+ atomic_dec()
+ : function<T (T*)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "dec")
+ {
+ }
+};
+
+template<class T>
+class atomic_cmpxchg : public function<T (T*, T, T)>
+{
+public:
+ atomic_cmpxchg()
+ : function<T (T*, T, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "cmpxchg")
+ {
+ }
+};
+
+template<class T>
+class atomic_max : public function<T (T*, T)>
+{
+public:
+ atomic_max()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "max")
+ {
+ }
+};
+
+template<class T>
+class atomic_min : public function<T (T*, T)>
+{
+public:
+ atomic_min()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "min")
+ {
+ }
+};
+
+template<class T>
+class atomic_and : public function<T (T*, T)>
+{
+public:
+ atomic_and()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "and")
+ {
+ }
+};
+
+template<class T>
+class atomic_or : public function<T (T*, T)>
+{
+public:
+ atomic_or()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "or")
+ {
+ }
+};
+
+template<class T>
+class atomic_xor : public function<T (T*, T)>
+{
+public:
+ atomic_xor()
+ : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xor")
+ {
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP
diff --git a/boost/compute/functional/bind.hpp b/boost/compute/functional/bind.hpp
new file mode 100644
index 0000000000..0c5929f3b9
--- /dev/null
+++ b/boost/compute/functional/bind.hpp
@@ -0,0 +1,261 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_BIND_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_BIND_HPP
+
+#include <boost/mpl/int.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/type_traits/conditional.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace placeholders {
+
+/// \internal_
+template<int I>
+struct placeholder : boost::integral_constant<int, I>
+{
+ placeholder() { }
+};
+
+placeholder<0> const _1;
+placeholder<1> const _2;
+
+} // end placeholders namespace
+
+/// Meta-function returning \c true if \c T is a placeholder type.
+template<class T>
+struct is_placeholder : boost::false_type
+{
+};
+
+/// \internal_
+template<int I>
+struct is_placeholder<placeholders::placeholder<I> > : boost::true_type
+{
+};
+
+namespace detail {
+
+template<class Function, class BoundArgs, class Args>
+struct invoked_bound_function
+{
+ invoked_bound_function(Function f, BoundArgs bound_args, Args args)
+ : m_function(f),
+ m_bound_args(bound_args),
+ m_args(args)
+ {
+ }
+
+ // meta-function returning true if the N'th argument is a placeholder
+ template<int N>
+ struct is_placeholder_arg
+ {
+ typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg;
+
+ typedef typename is_placeholder<nth_bound_arg>::type type;
+ static const bool value = is_placeholder<nth_bound_arg>::value;
+ };
+
+ template<class Arg>
+ struct get_arg_type
+ {
+ typedef Arg type;
+ };
+
+ template<int I>
+ struct get_arg_type<placeholders::placeholder<I> >
+ {
+ typedef typename boost::tuples::element<I, Args>::type type;
+ };
+
+ // meta-function returning the type of the N'th argument when invoked
+ template<int N>
+ struct get_nth_arg_type
+ {
+ typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg;
+
+ typedef typename get_arg_type<nth_bound_arg>::type type;
+ };
+
+ template<int N>
+ typename get_nth_arg_type<N>::type get_nth_arg(
+ typename boost::enable_if_c<is_placeholder_arg<N>::value>::type* = 0
+ ) const
+ {
+ typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg;
+
+ return boost::get<nth_bound_arg::value>(m_args);
+ }
+
+ template<int N>
+ typename get_nth_arg_type<N>::type get_nth_arg(
+ typename boost::disable_if_c<is_placeholder_arg<N>::value>::type* = 0
+ ) const
+ {
+ return boost::get<N>(m_bound_args);
+ }
+
+ Function m_function;
+ BoundArgs m_bound_args;
+ Args m_args;
+};
+
+template<class Function, class BoundArgs, class Args>
+inline meta_kernel& apply_invoked_bound_function(
+ meta_kernel &k,
+ const invoked_bound_function<Function, BoundArgs, Args> &expr,
+ typename boost::enable_if_c<
+ boost::tuples::length<BoundArgs>::value == 1
+ >::type* = 0
+)
+{
+ return k << expr.m_function(expr.template get_nth_arg<0>());
+}
+
+template<class Function, class BoundArgs, class Args>
+inline meta_kernel& apply_invoked_bound_function(
+ meta_kernel &k,
+ const invoked_bound_function<Function, BoundArgs, Args> &expr,
+ typename boost::enable_if_c<
+ boost::tuples::length<BoundArgs>::value == 2
+ >::type* = 0
+)
+{
+ return k << expr.m_function(expr.template get_nth_arg<0>(),
+ expr.template get_nth_arg<1>());
+}
+
+template<class Function, class BoundArgs, class Args>
+inline meta_kernel& apply_invoked_bound_function(
+ meta_kernel &k,
+ const invoked_bound_function<Function, BoundArgs, Args> &expr,
+ typename boost::enable_if_c<
+ boost::tuples::length<BoundArgs>::value == 3
+ >::type* = 0
+)
+{
+ return k << expr.m_function(expr.template get_nth_arg<0>(),
+ expr.template get_nth_arg<1>(),
+ expr.template get_nth_arg<2>());
+}
+
+template<class Function, class BoundArgs, class Args>
+inline meta_kernel& operator<<(
+ meta_kernel &k,
+ const invoked_bound_function<Function, BoundArgs, Args> &expr
+)
+{
+ return apply_invoked_bound_function(k, expr);
+}
+
+template<class Function, class BoundArgs>
+struct bound_function
+{
+ typedef int result_type;
+
+ bound_function(Function f, BoundArgs args)
+ : m_function(f),
+ m_args(args)
+ {
+ }
+
+ template<class Arg1>
+ detail::invoked_bound_function<
+ Function,
+ BoundArgs,
+ boost::tuple<Arg1>
+ >
+ operator()(const Arg1 &arg1) const
+ {
+ return detail::invoked_bound_function<
+ Function,
+ BoundArgs,
+ boost::tuple<Arg1>
+ >(m_function, m_args, boost::make_tuple(arg1));
+ }
+
+ template<class Arg1, class Arg2>
+ detail::invoked_bound_function<
+ Function,
+ BoundArgs,
+ boost::tuple<Arg1, Arg2>
+ >
+ operator()(const Arg1 &arg1, const Arg2 &arg2) const
+ {
+ return detail::invoked_bound_function<
+ Function,
+ BoundArgs,
+ boost::tuple<Arg1, Arg2>
+ >(m_function, m_args, boost::make_tuple(arg1, arg2));
+ }
+
+ Function m_function;
+ BoundArgs m_args;
+};
+
+} // end detail namespace
+
+#if !defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+/// Returns a function wrapper which invokes \p f with \p args when called.
+///
+/// For example, to generate a unary function object which returns \c true
+/// when its argument is less than \c 7:
+/// \code
+/// using boost::compute::less;
+/// using boost::compute::placeholders::_1;
+///
+/// auto less_than_seven = boost::compute::bind(less<int>(), _1, 7);
+/// \endcode
+template<class F, class... Args>
+inline detail::bound_function<F, boost::tuple<Args...> >
+bind(F f, Args... args)
+{
+ typedef typename boost::tuple<Args...> ArgsTuple;
+
+ return detail::bound_function<F, ArgsTuple>(f, boost::make_tuple(args...));
+}
+#else
+template<class F, class A1>
+inline detail::bound_function<F, boost::tuple<A1> >
+bind(F f, A1 a1)
+{
+ typedef typename boost::tuple<A1> Args;
+
+ return detail::bound_function<F, Args>(f, boost::make_tuple(a1));
+}
+
+template<class F, class A1, class A2>
+inline detail::bound_function<F, boost::tuple<A1, A2> >
+bind(F f, A1 a1, A2 a2)
+{
+ typedef typename boost::tuple<A1, A2> Args;
+
+ return detail::bound_function<F, Args>(f, boost::make_tuple(a1, a2));
+}
+
+template<class F, class A1, class A2, class A3>
+inline detail::bound_function<F, boost::tuple<A1, A2, A3> >
+bind(F f, A1 a1, A2 a2, A3 a3)
+{
+ typedef typename boost::tuple<A1, A2, A3> Args;
+
+ return detail::bound_function<F, Args>(f, boost::make_tuple(a1, a2, a3));
+}
+#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_BIND_HPP
diff --git a/boost/compute/functional/common.hpp b/boost/compute/functional/common.hpp
new file mode 100644
index 0000000000..9ad8b43502
--- /dev/null
+++ b/boost/compute/functional/common.hpp
@@ -0,0 +1,29 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP
+
+#include <boost/compute/functional/detail/macros.hpp>
+
+namespace boost {
+namespace compute {
+
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(clamp, T (T, T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(degrees, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(radians, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sign, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(smoothstep, T (T, T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(step, T (T, T), class T)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP
diff --git a/boost/compute/functional/convert.hpp b/boost/compute/functional/convert.hpp
new file mode 100644
index 0000000000..f182e8ec72
--- /dev/null
+++ b/boost/compute/functional/convert.hpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T, class Arg>
+struct invoked_convert
+{
+ invoked_convert(const Arg &arg)
+ : m_arg(arg)
+ {
+ }
+
+ Arg m_arg;
+};
+
+} // end detail namespace
+
+/// The \ref convert function converts its argument to type \c T (similar to
+/// static_cast<T>).
+///
+/// \see \ref as "as<T>"
+template<class T>
+struct convert
+{
+ typedef T result_type;
+
+ /// \internal_
+ template<class Arg>
+ detail::invoked_convert<T, Arg> operator()(const Arg &arg) const
+ {
+ return detail::invoked_convert<T, Arg>(arg);
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP
diff --git a/boost/compute/functional/detail/macros.hpp b/boost/compute/functional/detail/macros.hpp
new file mode 100644
index 0000000000..71ae3722e5
--- /dev/null
+++ b/boost/compute/functional/detail/macros.hpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP
+
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/stringize.hpp>
+
+#include <boost/compute/function.hpp>
+
+#define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(name, signature, template_args) \
+ template<template_args> \
+ class name : public function<signature> \
+ { \
+ public: \
+ (name)() : function<signature>(BOOST_PP_STRINGIZE(name)) { } \
+ };
+
+#define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(name, signature, template_args) \
+ template<template_args> \
+ class BOOST_PP_CAT(name, _) : public function<signature> \
+ { \
+ public: \
+ BOOST_PP_CAT(name, _)() : function<signature>(BOOST_PP_STRINGIZE(name)) { } \
+ };
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP
diff --git a/boost/compute/functional/detail/nvidia_ballot.hpp b/boost/compute/functional/detail/nvidia_ballot.hpp
new file mode 100644
index 0000000000..cf66828f1c
--- /dev/null
+++ b/boost/compute/functional/detail/nvidia_ballot.hpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP
+
+#include <boost/compute/function.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+class nvidia_ballot : public function<uint_(T)>
+{
+public:
+ nvidia_ballot()
+ : function<uint_(T)>("nvidia_ballot")
+ {
+ this->set_source(
+ "inline uint nvidia_ballot(const uint x)\n"
+ "{\n"
+ " uint result;\n"
+ " asm volatile(\n"
+ " \"setp.ne.u32 %%p1, %1, 0;\"\n"
+ " \"vote.ballot.b32 %0, %%p1;\"\n"
+ " : \"=r\"(result)\n"
+ " : \"r\"(x)\n"
+ " );\n"
+ " return result;\n"
+ "}\n"
+ );
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP
diff --git a/boost/compute/functional/detail/nvidia_popcount.hpp b/boost/compute/functional/detail/nvidia_popcount.hpp
new file mode 100644
index 0000000000..b042ea4ba9
--- /dev/null
+++ b/boost/compute/functional/detail/nvidia_popcount.hpp
@@ -0,0 +1,42 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP
+
+#include <boost/compute/function.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+class nvidia_popcount : public function<T(T)>
+{
+public:
+ nvidia_popcount()
+ : function<T(T)>("nvidia_popcount")
+ {
+ this->set_source(
+ "inline uint nvidia_popcount(const uint x)\n"
+ "{\n"
+ " uint count;\n"
+ " asm(\"popc.b32 %0, %1;\" : \"=r\"(count) : \"r\"(x));\n"
+ " return count;\n"
+ "}\n"
+ );
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP
diff --git a/boost/compute/functional/detail/unpack.hpp b/boost/compute/functional/detail/unpack.hpp
new file mode 100644
index 0000000000..e64672f142
--- /dev/null
+++ b/boost/compute/functional/detail/unpack.hpp
@@ -0,0 +1,143 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP
+
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/type_traits/is_vector_type.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Function, class Arg, size_t Arity>
+struct invoked_unpacked
+{
+ invoked_unpacked(const Function &f, const Arg &arg)
+ : m_function(f),
+ m_arg(arg)
+ {
+ }
+
+ Function m_function;
+ Arg m_arg;
+};
+
+template<class Function, class Arg, size_t Arity>
+inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, Arity> &expr);
+
+template<class Function, class Arg>
+inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 1> &expr)
+{
+ return k << expr.m_function(get<0>()(expr.m_arg));
+}
+
+template<class Function, class Arg>
+inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 2> &expr)
+{
+ return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg));
+}
+
+template<class Function, class Arg>
+inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 3> &expr)
+{
+ return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg), get<2>()(expr.m_arg));
+}
+
+template<class Function>
+struct unpacked
+{
+ template<class T, class Enable = void>
+ struct aggregate_length
+ {
+ BOOST_STATIC_CONSTANT(size_t, value = boost::tuples::length<T>::value);
+ };
+
+ template<class T>
+ struct aggregate_length<T, typename enable_if<is_vector_type<T> >::type>
+ {
+ BOOST_STATIC_CONSTANT(size_t, value = vector_size<T>::value);
+ };
+
+ template<class TupleArg, size_t TupleSize>
+ struct result_impl {};
+
+ template<class TupleArg>
+ struct result_impl<TupleArg, 1>
+ {
+ typedef typename detail::get_result_type<0, TupleArg>::type T1;
+
+ typedef typename boost::compute::result_of<Function(T1)>::type type;
+ };
+
+ template<class TupleArg>
+ struct result_impl<TupleArg, 2>
+ {
+ typedef typename detail::get_result_type<0, TupleArg>::type T1;
+ typedef typename detail::get_result_type<1, TupleArg>::type T2;
+
+ typedef typename boost::compute::result_of<Function(T1, T2)>::type type;
+ };
+
+ template<class TupleArg>
+ struct result_impl<TupleArg, 3>
+ {
+ typedef typename detail::get_result_type<0, TupleArg>::type T1;
+ typedef typename detail::get_result_type<1, TupleArg>::type T2;
+ typedef typename detail::get_result_type<2, TupleArg>::type T3;
+
+ typedef typename boost::compute::result_of<Function(T1, T2, T3)>::type type;
+ };
+
+ template<class Signature>
+ struct result {};
+
+ template<class This, class Arg>
+ struct result<This(Arg)>
+ {
+ typedef typename result_impl<Arg, aggregate_length<Arg>::value>::type type;
+ };
+
+ unpacked(const Function &f)
+ : m_function(f)
+ {
+ }
+
+ template<class Arg>
+ detail::invoked_unpacked<
+ Function, Arg, aggregate_length<typename Arg::result_type>::value
+ >
+ operator()(const Arg &arg) const
+ {
+ return detail::invoked_unpacked<
+ Function,
+ Arg,
+ aggregate_length<typename Arg::result_type>::value
+ >(m_function, arg);
+ }
+
+ Function m_function;
+};
+
+template<class Function>
+inline unpacked<Function> unpack(const Function &f)
+{
+ return unpacked<Function>(f);
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP
diff --git a/boost/compute/functional/field.hpp b/boost/compute/functional/field.hpp
new file mode 100644
index 0000000000..cd9d81541c
--- /dev/null
+++ b/boost/compute/functional/field.hpp
@@ -0,0 +1,86 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP
+
+#include <string>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T, class Arg>
+struct invoked_field
+{
+ typedef T result_type;
+
+ invoked_field(const Arg &arg, const std::string &field)
+ : m_arg(arg),
+ m_field(field)
+ {
+ }
+
+ Arg m_arg;
+ std::string m_field;
+};
+
+} // end detail namespace
+
+/// Returns the named field from a value.
+///
+/// The template-type \c T specifies the field's value type. Note
+/// that the value type must match the actual type of the field
+/// otherwise runtime compilation or logic errors may occur.
+///
+/// For example, to access the \c second field in a
+/// \c std::pair<int, float> object:
+/// \code
+/// field<float>("second");
+/// \endcode
+///
+/// This can also be used with vector types to access individual
+/// components as well as perform swizzle operations.
+///
+/// For example, to access the first and third components of an
+/// \c int vector type (e.g. \c int4):
+/// \code
+/// field<int2_>("xz");
+/// \endcode
+///
+/// \see \ref get "get<N>"
+template<class T>
+class field
+{
+public:
+ /// Result type.
+ typedef T result_type;
+
+ /// Creates a new field functor with \p field.
+ field(const std::string &field)
+ : m_field(field)
+ {
+ }
+
+ /// \internal_
+ template<class Arg>
+ detail::invoked_field<T, Arg> operator()(const Arg &arg) const
+ {
+ return detail::invoked_field<T, Arg>(arg, m_field);
+ }
+
+private:
+ std::string m_field;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP
diff --git a/boost/compute/functional/geometry.hpp b/boost/compute/functional/geometry.hpp
new file mode 100644
index 0000000000..ea37c02bfe
--- /dev/null
+++ b/boost/compute/functional/geometry.hpp
@@ -0,0 +1,32 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP
+
+#include <boost/compute/type_traits.hpp>
+#include <boost/compute/functional/detail/macros.hpp>
+
+namespace boost {
+namespace compute {
+
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cross, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(dot, typename scalar_type<T>::type (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(distance, typename scalar_type<T>::type (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_distance, typename scalar_type<T>::type (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(length, typename scalar_type<T>::type (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_length, typename scalar_type<T>::type (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(normalize, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_normalize, T (T), class T)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP
diff --git a/boost/compute/functional/get.hpp b/boost/compute/functional/get.hpp
new file mode 100644
index 0000000000..2d3b7a489c
--- /dev/null
+++ b/boost/compute/functional/get.hpp
@@ -0,0 +1,76 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_GET_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_GET_HPP
+
+#include <cstddef>
+
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/type_traits/scalar_type.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// meta-function returning the result type for get<N>()
+template<size_t N, class Arg>
+struct get_result_type
+{
+ typedef typename scalar_type<Arg>::type type;
+};
+
+template<size_t N, class Arg, class T>
+struct invoked_get
+{
+ typedef typename get_result_type<N, T>::type result_type;
+
+ invoked_get(const Arg &arg)
+ : m_arg(arg)
+ {
+ }
+
+ Arg m_arg;
+};
+
+} // end detail namespace
+
+/// Returns the \c N'th element of an aggregate type (e.g. scalarN,
+/// pair, tuple, etc.).
+///
+/// \see \ref field "field<T>"
+template<size_t N>
+struct get
+{
+ /// \internal_
+ template<class> struct result;
+
+ /// \internal_
+ template<class F, class Arg>
+ struct result<F(Arg)>
+ {
+ typedef typename detail::get_result_type<N, Arg>::type type;
+ };
+
+ template<class Arg>
+ detail::invoked_get<
+ N, Arg, typename boost::remove_cv<typename Arg::result_type>::type
+ > operator()(const Arg &arg) const
+ {
+ typedef typename boost::remove_cv<typename Arg::result_type>::type T;
+
+ return detail::invoked_get<N, Arg, T>(arg);
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_GET_HPP
diff --git a/boost/compute/functional/hash.hpp b/boost/compute/functional/hash.hpp
new file mode 100644
index 0000000000..830c422fdb
--- /dev/null
+++ b/boost/compute/functional/hash.hpp
@@ -0,0 +1,91 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_HASH_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_HASH_HPP
+
+#include <boost/compute/function.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Key>
+std::string make_hash_function_name()
+{
+ return std::string("boost_hash_") + type_name<Key>();
+}
+
+template<class Key>
+inline std::string make_hash_function_source()
+{
+ std::stringstream source;
+ source << "inline ulong " << make_hash_function_name<Key>()
+ << "(const " << type_name<Key>() << " x)\n"
+ << "{\n"
+ // note we reinterpret the argument as a 32-bit uint and
+ // then promote it to a 64-bit ulong for the result type
+ << " ulong a = as_uint(x);\n"
+ << " a = (a ^ 61) ^ (a >> 16);\n"
+ << " a = a + (a << 3);\n"
+ << " a = a ^ (a >> 4);\n"
+ << " a = a * 0x27d4eb2d;\n"
+ << " a = a ^ (a >> 15);\n"
+ << " return a;\n"
+ << "}\n";
+ return source.str();
+}
+
+template<class Key>
+struct hash_impl
+{
+ typedef Key argument_type;
+ typedef ulong_ result_type;
+
+ hash_impl()
+ : m_function("")
+ {
+ m_function = make_function_from_source<result_type(argument_type)>(
+ make_hash_function_name<argument_type>(),
+ make_hash_function_source<argument_type>()
+ );
+ }
+
+ template<class Arg>
+ invoked_function<result_type, boost::tuple<Arg> >
+ operator()(const Arg &arg) const
+ {
+ return m_function(arg);
+ }
+
+ function<result_type(argument_type)> m_function;
+};
+
+} // end detail namespace
+
+/// The hash function returns a hash value for the input value.
+///
+/// The return type is \c ulong_ (the OpenCL unsigned long type).
+template<class Key> struct hash;
+
+/// \internal_
+template<> struct hash<int_> : detail::hash_impl<int_> { };
+
+/// \internal_
+template<> struct hash<uint_> : detail::hash_impl<uint_> { };
+
+/// \internal_
+template<> struct hash<float_> : detail::hash_impl<float_> { };
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_HASH_HPP
diff --git a/boost/compute/functional/identity.hpp b/boost/compute/functional/identity.hpp
new file mode 100644
index 0000000000..72740d9788
--- /dev/null
+++ b/boost/compute/functional/identity.hpp
@@ -0,0 +1,64 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T, class Arg>
+struct invoked_identity
+{
+ typedef T result_type;
+
+ invoked_identity(const Arg &arg)
+ : m_arg(arg)
+ {
+ }
+
+ Arg m_arg;
+};
+
+} // end detail namespace
+
+/// Identity function which simply returns its input.
+///
+/// For example, to directly copy values using the transform() algorithm:
+/// \code
+/// transform(input.begin(), input.end(), output.begin(), identity<int>(), queue);
+/// \endcode
+///
+/// \see \ref as "as<T>", \ref convert "convert<T>"
+template<class T>
+class identity
+{
+public:
+ /// Identity function result type.
+ typedef T result_type;
+
+ /// Creates a new identity function.
+ identity()
+ {
+ }
+
+ /// \internal_
+ template<class Arg>
+ detail::invoked_identity<T, Arg> operator()(const Arg &arg) const
+ {
+ return detail::invoked_identity<T, Arg>(arg);
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP
diff --git a/boost/compute/functional/integer.hpp b/boost/compute/functional/integer.hpp
new file mode 100644
index 0000000000..8ff6c2bd4a
--- /dev/null
+++ b/boost/compute/functional/integer.hpp
@@ -0,0 +1,30 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP
+
+#include <boost/compute/functional/detail/macros.hpp>
+
+namespace boost {
+namespace compute {
+
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs_diff, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(add_sat, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hadd, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rhadd, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(max, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(min, T (T, T), class T)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP
diff --git a/boost/compute/functional/logical.hpp b/boost/compute/functional/logical.hpp
new file mode 100644
index 0000000000..2e2c7518b5
--- /dev/null
+++ b/boost/compute/functional/logical.hpp
@@ -0,0 +1,208 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Predicate, class Expr>
+class invoked_unary_negate_function
+{
+public:
+ typedef int result_type;
+
+ invoked_unary_negate_function(const Predicate &pred,
+ const Expr &expr)
+ : m_pred(pred),
+ m_expr(expr)
+ {
+ }
+
+ Predicate pred() const
+ {
+ return m_pred;
+ }
+
+ Expr expr() const
+ {
+ return m_expr;
+ }
+
+private:
+ Predicate m_pred;
+ Expr m_expr;
+};
+
+template<class Predicate, class Expr1, class Expr2>
+class invoked_binary_negate_function
+{
+public:
+ typedef int result_type;
+
+ invoked_binary_negate_function(const Predicate &pred,
+ const Expr1 &expr1,
+ const Expr2 &expr2)
+ : m_pred(pred),
+ m_expr1(expr1),
+ m_expr2(expr2)
+ {
+ }
+
+ Predicate pred() const
+ {
+ return m_pred;
+ }
+
+ Expr1 expr1() const
+ {
+ return m_expr1;
+ }
+
+ Expr2 expr2() const
+ {
+ return m_expr2;
+ }
+
+private:
+ Predicate m_pred;
+ Expr1 m_expr1;
+ Expr2 m_expr2;
+};
+
+} // end detail namespace
+
+/// \internal_
+template<class Arg, class Result>
+struct unary_function
+{
+ typedef Arg argument_type;
+ typedef Result result_type;
+};
+
+/// \internal_
+template<class Arg1, class Arg2, class Result>
+struct binary_function
+{
+ typedef Arg1 first_argument_type;
+ typedef Arg2 second_argument_type;
+ typedef Result result_type;
+};
+
+/// \internal_
+template<class Arg1, class Arg2, class Arg3, class Result>
+struct ternary_function
+{
+ typedef Arg1 first_argument_type;
+ typedef Arg2 second_argument_type;
+ typedef Arg3 third_argument_type;
+ typedef Result result_type;
+};
+
+/// The unary_negate function adaptor negates a unary function.
+///
+/// \see not1()
+template<class Predicate>
+class unary_negate : public unary_function<void, int>
+{
+public:
+ explicit unary_negate(Predicate pred)
+ : m_pred(pred)
+ {
+ }
+
+ /// \internal_
+ template<class Arg>
+ detail::invoked_unary_negate_function<Predicate, Arg>
+ operator()(const Arg &arg) const
+ {
+ return detail::invoked_unary_negate_function<
+ Predicate,
+ Arg
+ >(m_pred, arg);
+ }
+
+private:
+ Predicate m_pred;
+};
+
+/// The binnary_negate function adaptor negates a binary function.
+///
+/// \see not2()
+template<class Predicate>
+class binary_negate : public binary_function<void, void, int>
+{
+public:
+ explicit binary_negate(Predicate pred)
+ : m_pred(pred)
+ {
+ }
+
+ /// \internal_
+ template<class Arg1, class Arg2>
+ detail::invoked_binary_negate_function<Predicate, Arg1, Arg2>
+ operator()(const Arg1 &arg1, const Arg2 &arg2) const
+ {
+ return detail::invoked_binary_negate_function<
+ Predicate,
+ Arg1,
+ Arg2
+ >(m_pred, arg1, arg2);
+ }
+
+private:
+ Predicate m_pred;
+};
+
+/// Returns a unary_negate adaptor around \p predicate.
+///
+/// \param predicate the unary function to wrap
+///
+/// \return a unary_negate wrapper around \p predicate
+template<class Predicate>
+inline unary_negate<Predicate> not1(const Predicate &predicate)
+{
+ return unary_negate<Predicate>(predicate);
+}
+
+/// Returns a binary_negate adaptor around \p predicate.
+///
+/// \param predicate the binary function to wrap
+///
+/// \return a binary_negate wrapper around \p predicate
+template<class Predicate>
+inline binary_negate<Predicate> not2(const Predicate &predicate)
+{
+ return binary_negate<Predicate>(predicate);
+}
+
+/// The logical_not function negates its argument and returns it.
+///
+/// \see not1(), not2()
+template<class T>
+struct logical_not : public unary_function<T, int>
+{
+ /// \internal_
+ template<class Expr>
+ detail::invoked_function<int, boost::tuple<Expr> >
+ operator()(const Expr &expr) const
+ {
+ return detail::invoked_function<int, boost::tuple<Expr> >(
+ "!", std::string(), boost::make_tuple(expr)
+ );
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP
diff --git a/boost/compute/functional/math.hpp b/boost/compute/functional/math.hpp
new file mode 100644
index 0000000000..6dea05f6f2
--- /dev/null
+++ b/boost/compute/functional/math.hpp
@@ -0,0 +1,80 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_MATH_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_MATH_HPP
+
+#include <boost/compute/functional/detail/macros.hpp>
+
+namespace boost {
+namespace compute {
+
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acos, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acosh, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acospi, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asin, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinh, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinpi, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanh, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanpi, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2pi, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cbrt, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ceil, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(copysign, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cos, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cosh, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cospi, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erf, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erfc, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp2, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp10, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(expm1, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fabs, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fdim, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(floor, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fma, T (T, T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmax, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmin, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmod, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hypot, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ilogb, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(lgamma, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log2, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log10, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log1p, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(logb, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(mad, T (T, T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(nextafter, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pow, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pown, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(powr, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(remainder, T (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rint, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rootn, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(round, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rsqrt, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sin, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinh, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinpi, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sqrt, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tan, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanh, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanpi, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tgamma, T (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(trunc, T (T), class T)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_MATH_HPP
diff --git a/boost/compute/functional/operator.hpp b/boost/compute/functional/operator.hpp
new file mode 100644
index 0000000000..908372a326
--- /dev/null
+++ b/boost/compute/functional/operator.hpp
@@ -0,0 +1,100 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP
+
+#include <string>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Expr1, class Expr2, class Result>
+struct invoked_binary_operator
+{
+ typedef Result result_type;
+
+ invoked_binary_operator(const std::string &op,
+ const Expr1 &arg1,
+ const Expr2 &arg2)
+ : m_op(op),
+ m_expr1(arg1),
+ m_expr2(arg2)
+ {
+ }
+
+ std::string op() const
+ {
+ return m_op;
+ }
+
+ Expr1 arg1() const
+ {
+ return m_expr1;
+ }
+
+ Expr2 arg2() const
+ {
+ return m_expr2;
+ }
+
+ std::string m_op;
+ Expr1 m_expr1;
+ Expr2 m_expr2;
+};
+
+} // end detail namespace
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(name, op, return_type, arg_type) \
+ template<class arg_type> \
+ class name : public function<return_type (arg_type, arg_type)> \
+ { \
+ public: \
+ name() : function<return_type (arg_type, arg_type)>(BOOST_PP_STRINGIZE(name)) { } \
+ \
+ template<class Arg1, class Arg2> \
+ detail::invoked_binary_operator<Arg1, Arg2, T> \
+ operator()(const Arg1 &x, const Arg2 &y) const \
+ { \
+ return detail::invoked_binary_operator<Arg1, Arg2, T>(op, x, y); \
+ } \
+ };
+
+// arithmetic operations
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(plus, "+", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(minus, "-", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(multiplies, "*", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(divides, "/", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(modulus, "%", T, T)
+
+// comparisons
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(equal_to, "==", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(not_equal_to, "!=", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater, ">", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less, "<", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater_equal, ">=", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less_equal, "<=", T, T)
+
+// logical operators
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_and, "&&", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_or, "||", T, T)
+
+// bitwise operations
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_and, "&", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_or, "|", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_xor, "^", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_left, "<<", T, T)
+BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_right, ">>", T, T)
+
+} // end compute namespace
+} // end boost namespace
+#endif // BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP
diff --git a/boost/compute/functional/popcount.hpp b/boost/compute/functional/popcount.hpp
new file mode 100644
index 0000000000..7326e7022f
--- /dev/null
+++ b/boost/compute/functional/popcount.hpp
@@ -0,0 +1,55 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP
+
+#include <boost/compute/function.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns the number of non-zero bits in \p x.
+///
+/// \see_opencl_ref{popcount}
+template<class T>
+class popcount : public function<T(T)>
+{
+public:
+ popcount()
+ : function<T(T)>("boost_popcount")
+ {
+ std::stringstream s;
+ s << "inline " << type_name<T>() << " boost_popcount"
+ << "(const " << type_name<T>() << " x)\n"
+ << "{\n"
+ // use built-in popcount if opencl 1.2 is supported
+ << "#if __OPENCL_VERSION__ >= 120\n"
+ << " return popcount(x);\n"
+ // fallback to generic popcount() implementation
+ << "#else\n"
+ << " " << type_name<T>() << " count = 0;\n"
+ << " for(" << type_name<T>() << " i = 0; i < sizeof(i) * CHAR_BIT; i++){\n"
+ << " if(x & (" << type_name<T>() << ") 1 << i){\n"
+ << " count++;\n"
+ << " }\n"
+ << " }\n"
+ << " return count;\n"
+ << "#endif\n"
+ << "}\n";
+ this->set_source(s.str());
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP
diff --git a/boost/compute/functional/relational.hpp b/boost/compute/functional/relational.hpp
new file mode 100644
index 0000000000..1a88052c05
--- /dev/null
+++ b/boost/compute/functional/relational.hpp
@@ -0,0 +1,39 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP
+#define BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP
+
+#include <boost/compute/functional/detail/macros.hpp>
+
+namespace boost {
+namespace compute {
+
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isequal, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnotequal, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreater, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreaterequal, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isless, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessequal, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessgreater, int (T, T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isfinite, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isinf, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnan, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnormal, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isordered, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isunordered, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(signbit, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(any, int (T), class T)
+BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(all, int (T), class T)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP
diff --git a/boost/compute/image.hpp b/boost/compute/image.hpp
new file mode 100644
index 0000000000..1a7b9ca061
--- /dev/null
+++ b/boost/compute/image.hpp
@@ -0,0 +1,25 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_HPP
+#define BOOST_COMPUTE_IMAGE_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute image headers.
+
+#include <boost/compute/image/image1d.hpp>
+#include <boost/compute/image/image2d.hpp>
+#include <boost/compute/image/image3d.hpp>
+#include <boost/compute/image/image_format.hpp>
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/image/image_sampler.hpp>
+
+#endif // BOOST_COMPUTE_IMAGE_HPP
diff --git a/boost/compute/image/image1d.hpp b/boost/compute/image/image1d.hpp
new file mode 100644
index 0000000000..2d71934ab4
--- /dev/null
+++ b/boost/compute/image/image1d.hpp
@@ -0,0 +1,204 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_IMAGE1D_HPP
+#define BOOST_COMPUTE_IMAGE_IMAGE1D_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/image/image_format.hpp>
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declarations
+class command_queue;
+
+/// \class image1d
+/// \brief An OpenCL 1D image object
+///
+/// \opencl_version_warning{1,2}
+///
+/// \see image_format, image2d
+class image1d : public image_object
+{
+public:
+ /// Creates a null image1d object.
+ image1d()
+ : image_object()
+ {
+ }
+
+ /// Creates a new image1d object.
+ ///
+ /// \see_opencl_ref{clCreateImage}
+ image1d(const context &context,
+ size_t image_width,
+ const image_format &format,
+ cl_mem_flags flags = read_write,
+ void *host_ptr = 0)
+ {
+ #ifdef CL_VERSION_1_2
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+ desc.image_width = image_width;
+ desc.image_height = 1;
+ desc.image_depth = 1;
+ desc.image_array_size = 0;
+ desc.image_row_pitch = 0;
+ desc.image_slice_pitch = 0;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
+ #ifdef CL_VERSION_2_0
+ desc.mem_object = 0;
+ #else
+ desc.buffer = 0;
+ #endif
+
+ cl_int error = 0;
+
+ m_mem = clCreateImage(
+ context, flags, format.get_format_ptr(), &desc, host_ptr, &error
+ );
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ #else
+ // image1d objects are only supported in OpenCL 1.2 and later
+ BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED));
+ #endif
+ }
+
+ /// Creates a new image1d as a copy of \p other.
+ image1d(const image1d &other)
+ : image_object(other)
+ {
+ }
+
+ /// Copies the image1d from \p other.
+ image1d& operator=(const image1d &other)
+ {
+ image_object::operator=(other);
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new image object from \p other.
+ image1d(image1d&& other) BOOST_NOEXCEPT
+ : image_object(std::move(other))
+ {
+ }
+
+ /// Move-assigns the image from \p other to \c *this.
+ image1d& operator=(image1d&& other) BOOST_NOEXCEPT
+ {
+ image_object::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the image1d object.
+ ~image1d()
+ {
+ }
+
+ /// Returns the size (width) of the image.
+ extents<1> size() const
+ {
+ extents<1> size;
+ size[0] = get_info<size_t>(CL_IMAGE_WIDTH);
+ return size;
+ }
+
+ /// Returns the origin of the image (\c 0).
+ extents<1> origin() const
+ {
+ return extents<1>();
+ }
+
+ /// Returns information about the image.
+ ///
+ /// \see_opencl_ref{clGetImageInfo}
+ template<class T>
+ T get_info(cl_image_info info) const
+ {
+ return get_image_info<T>(info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<image1d, Enum>::type
+ get_info() const;
+
+ /// Returns the supported image formats for the context.
+ ///
+ /// \see_opencl_ref{clGetSupportedImageFormats}
+ static std::vector<image_format>
+ get_supported_formats(const context &context, cl_mem_flags flags = read_write)
+ {
+ #ifdef CL_VERSION_1_2
+ return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags);
+ #else
+ return std::vector<image_format>();
+ #endif
+ }
+
+ /// Returns \c true if \p format is a supported 1D image format for
+ /// \p context.
+ static bool is_supported_format(const image_format &format,
+ const context &context,
+ cl_mem_flags flags = read_write)
+ {
+ #ifdef CL_VERSION_1_2
+ return image_object::is_supported_format(
+ format, context, CL_MEM_OBJECT_IMAGE1D, flags
+ );
+ #else
+ return false;
+ #endif
+ }
+
+ /// Creates a new image with a copy of the data in \c *this. Uses \p queue
+ /// to perform the copy operation.
+ image1d clone(command_queue &queue) const;
+};
+
+/// \internal_ define get_info() specializations for image1d
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image1d,
+ ((cl_image_format, CL_IMAGE_FORMAT))
+ ((size_t, CL_IMAGE_ELEMENT_SIZE))
+ ((size_t, CL_IMAGE_ROW_PITCH))
+ ((size_t, CL_IMAGE_SLICE_PITCH))
+ ((size_t, CL_IMAGE_WIDTH))
+ ((size_t, CL_IMAGE_HEIGHT))
+ ((size_t, CL_IMAGE_DEPTH))
+)
+
+namespace detail {
+
+// set_kernel_arg() specialization for image1d
+template<>
+struct set_kernel_arg<image1d> : public set_kernel_arg<image_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(boost::compute::image1d, image1d_t)
+
+#endif // BOOST_COMPUTE_IMAGE_IMAGE1D_HPP
diff --git a/boost/compute/image/image2d.hpp b/boost/compute/image/image2d.hpp
new file mode 100644
index 0000000000..c203a9417f
--- /dev/null
+++ b/boost/compute/image/image2d.hpp
@@ -0,0 +1,262 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_IMAGE2D_HPP
+#define BOOST_COMPUTE_IMAGE_IMAGE2D_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/image/image_format.hpp>
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declarations
+class command_queue;
+
+/// \class image2d
+/// \brief An OpenCL 2D image object
+///
+/// For example, to create a 640x480 8-bit RGBA image:
+///
+/// \snippet test/test_image2d.cpp create_image
+///
+/// \see image_format, image3d
+class image2d : public image_object
+{
+public:
+ /// Creates a null image2d object.
+ image2d()
+ : image_object()
+ {
+ }
+
+ /// Creates a new image2d object.
+ ///
+ /// \see_opencl_ref{clCreateImage}
+ image2d(const context &context,
+ size_t image_width,
+ size_t image_height,
+ const image_format &format,
+ cl_mem_flags flags = read_write,
+ void *host_ptr = 0,
+ size_t image_row_pitch = 0)
+ {
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_1_2
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ desc.image_width = image_width;
+ desc.image_height = image_height;
+ desc.image_depth = 1;
+ desc.image_array_size = 0;
+ desc.image_row_pitch = image_row_pitch;
+ desc.image_slice_pitch = 0;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
+ #ifdef CL_VERSION_2_0
+ desc.mem_object = 0;
+ #else
+ desc.buffer = 0;
+ #endif
+
+ m_mem = clCreateImage(context,
+ flags,
+ format.get_format_ptr(),
+ &desc,
+ host_ptr,
+ &error);
+ #else
+ m_mem = clCreateImage2D(context,
+ flags,
+ format.get_format_ptr(),
+ image_width,
+ image_height,
+ image_row_pitch,
+ host_ptr,
+ &error);
+ #endif
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// \internal_ (deprecated)
+ image2d(const context &context,
+ cl_mem_flags flags,
+ const image_format &format,
+ size_t image_width,
+ size_t image_height,
+ size_t image_row_pitch = 0,
+ void *host_ptr = 0)
+ {
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_1_2
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ desc.image_width = image_width;
+ desc.image_height = image_height;
+ desc.image_depth = 1;
+ desc.image_array_size = 0;
+ desc.image_row_pitch = image_row_pitch;
+ desc.image_slice_pitch = 0;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
+ #ifdef CL_VERSION_2_0
+ desc.mem_object = 0;
+ #else
+ desc.buffer = 0;
+ #endif
+
+ m_mem = clCreateImage(context,
+ flags,
+ format.get_format_ptr(),
+ &desc,
+ host_ptr,
+ &error);
+ #else
+ m_mem = clCreateImage2D(context,
+ flags,
+ format.get_format_ptr(),
+ image_width,
+ image_height,
+ image_row_pitch,
+ host_ptr,
+ &error);
+ #endif
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new image2d as a copy of \p other.
+ image2d(const image2d &other)
+ : image_object(other)
+ {
+ }
+
+ /// Copies the image2d from \p other.
+ image2d& operator=(const image2d &other)
+ {
+ image_object::operator=(other);
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new image object from \p other.
+ image2d(image2d&& other) BOOST_NOEXCEPT
+ : image_object(std::move(other))
+ {
+ }
+
+ /// Move-assigns the image from \p other to \c *this.
+ image2d& operator=(image2d&& other) BOOST_NOEXCEPT
+ {
+ image_object::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the image2d object.
+ ~image2d()
+ {
+ }
+
+ /// Returns the size (width, height) of the image.
+ extents<2> size() const
+ {
+ extents<2> size;
+ size[0] = get_info<size_t>(CL_IMAGE_WIDTH);
+ size[1] = get_info<size_t>(CL_IMAGE_HEIGHT);
+ return size;
+ }
+
+ /// Returns the origin of the image (\c 0, \c 0).
+ extents<2> origin() const
+ {
+ return extents<2>();
+ }
+
+ /// Returns information about the image.
+ ///
+ /// \see_opencl_ref{clGetImageInfo}
+ template<class T>
+ T get_info(cl_image_info info) const
+ {
+ return detail::get_object_info<T>(clGetImageInfo, m_mem, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<image2d, Enum>::type
+ get_info() const;
+
+ /// Returns the supported image formats for the context.
+ ///
+ /// \see_opencl_ref{clGetSupportedImageFormats}
+ static std::vector<image_format>
+ get_supported_formats(const context &context, cl_mem_flags flags = read_write)
+ {
+ return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE2D, flags);
+ }
+
+ /// Returns \c true if \p format is a supported 2D image format for
+ /// \p context.
+ static bool is_supported_format(const image_format &format,
+ const context &context,
+ cl_mem_flags flags = read_write)
+ {
+ return image_object::is_supported_format(
+ format, context, CL_MEM_OBJECT_IMAGE2D, flags
+ );
+ }
+
+ /// Creates a new image with a copy of the data in \c *this. Uses \p queue
+ /// to perform the copy operation.
+ image2d clone(command_queue &queue) const;
+};
+
+/// \internal_ define get_info() specializations for image2d
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image2d,
+ ((cl_image_format, CL_IMAGE_FORMAT))
+ ((size_t, CL_IMAGE_ELEMENT_SIZE))
+ ((size_t, CL_IMAGE_ROW_PITCH))
+ ((size_t, CL_IMAGE_SLICE_PITCH))
+ ((size_t, CL_IMAGE_WIDTH))
+ ((size_t, CL_IMAGE_HEIGHT))
+ ((size_t, CL_IMAGE_DEPTH))
+)
+
+namespace detail {
+
+// set_kernel_arg() specialization for image2d
+template<>
+struct set_kernel_arg<image2d> : public set_kernel_arg<image_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(boost::compute::image2d, image2d_t)
+
+#endif // BOOST_COMPUTE_IMAGE_IMAGE2D_HPP
diff --git a/boost/compute/image/image3d.hpp b/boost/compute/image/image3d.hpp
new file mode 100644
index 0000000000..9463cfaa16
--- /dev/null
+++ b/boost/compute/image/image3d.hpp
@@ -0,0 +1,265 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_IMAGE3D_HPP
+#define BOOST_COMPUTE_IMAGE_IMAGE3D_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/image/image_format.hpp>
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declarations
+class command_queue;
+
+/// \class image3d
+/// \brief An OpenCL 3D image object
+///
+/// \see image_format, image2d
+class image3d : public image_object
+{
+public:
+ /// Creates a null image3d object.
+ image3d()
+ : image_object()
+ {
+ }
+
+ /// Creates a new image3d object.
+ ///
+ /// \see_opencl_ref{clCreateImage}
+ image3d(const context &context,
+ size_t image_width,
+ size_t image_height,
+ size_t image_depth,
+ const image_format &format,
+ cl_mem_flags flags = read_write,
+ void *host_ptr = 0,
+ size_t image_row_pitch = 0,
+ size_t image_slice_pitch = 0)
+ {
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_1_2
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ desc.image_width = image_width;
+ desc.image_height = image_height;
+ desc.image_depth = image_depth;
+ desc.image_array_size = 0;
+ desc.image_row_pitch = image_row_pitch;
+ desc.image_slice_pitch = image_slice_pitch;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
+ #ifdef CL_VERSION_2_0
+ desc.mem_object = 0;
+ #else
+ desc.buffer = 0;
+ #endif
+
+ m_mem = clCreateImage(context,
+ flags,
+ format.get_format_ptr(),
+ &desc,
+ host_ptr,
+ &error);
+ #else
+ m_mem = clCreateImage3D(context,
+ flags,
+ format.get_format_ptr(),
+ image_width,
+ image_height,
+ image_depth,
+ image_row_pitch,
+ image_slice_pitch,
+ host_ptr,
+ &error);
+ #endif
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// \internal_ (deprecated)
+ image3d(const context &context,
+ cl_mem_flags flags,
+ const image_format &format,
+ size_t image_width,
+ size_t image_height,
+ size_t image_depth,
+ size_t image_row_pitch,
+ size_t image_slice_pitch = 0,
+ void *host_ptr = 0)
+ {
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_1_2
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ desc.image_width = image_width;
+ desc.image_height = image_height;
+ desc.image_depth = image_depth;
+ desc.image_array_size = 0;
+ desc.image_row_pitch = image_row_pitch;
+ desc.image_slice_pitch = image_slice_pitch;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
+ #ifdef CL_VERSION_2_0
+ desc.mem_object = 0;
+ #else
+ desc.buffer = 0;
+ #endif
+
+ m_mem = clCreateImage(context,
+ flags,
+ format.get_format_ptr(),
+ &desc,
+ host_ptr,
+ &error);
+ #else
+ m_mem = clCreateImage3D(context,
+ flags,
+ format.get_format_ptr(),
+ image_width,
+ image_height,
+ image_depth,
+ image_row_pitch,
+ image_slice_pitch,
+ host_ptr,
+ &error);
+ #endif
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new image3d as a copy of \p other.
+ image3d(const image3d &other)
+ : image_object(other)
+ {
+ }
+
+ /// Copies the image3d from \p other.
+ image3d& operator=(const image3d &other)
+ {
+ image_object::operator=(other);
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new image object from \p other.
+ image3d(image3d&& other) BOOST_NOEXCEPT
+ : image_object(std::move(other))
+ {
+ }
+
+ /// Move-assigns the image from \p other to \c *this.
+ image3d& operator=(image3d&& other) BOOST_NOEXCEPT
+ {
+ image_object::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the image3d object.
+ ~image3d()
+ {
+ }
+
+ /// Returns the size (width, height, depth) of the image.
+ extents<3> size() const
+ {
+ extents<3> size;
+ size[0] = get_info<size_t>(CL_IMAGE_WIDTH);
+ size[1] = get_info<size_t>(CL_IMAGE_HEIGHT);
+ size[2] = get_info<size_t>(CL_IMAGE_DEPTH);
+ return size;
+ }
+
+ /// Returns the origin of the image (\c 0, \c 0, \c 0).
+ extents<3> origin() const
+ {
+ return extents<3>();
+ }
+
+ /// Returns information about the image.
+ ///
+ /// \see_opencl_ref{clGetImageInfo}
+ template<class T>
+ T get_info(cl_image_info info) const
+ {
+ return detail::get_object_info<T>(clGetImageInfo, m_mem, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<image3d, Enum>::type
+ get_info() const;
+
+ /// Returns the supported 3D image formats for the context.
+ ///
+ /// \see_opencl_ref{clGetSupportedImageFormats}
+ static std::vector<image_format>
+ get_supported_formats(const context &context, cl_mem_flags flags = read_write)
+ {
+ return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE3D, flags);
+ }
+
+ /// Returns \c true if \p format is a supported 3D image format for
+ /// \p context.
+ static bool is_supported_format(const image_format &format,
+ const context &context,
+ cl_mem_flags flags = read_write)
+ {
+ return image_object::is_supported_format(
+ format, context, CL_MEM_OBJECT_IMAGE3D, flags
+ );
+ }
+
+ /// Creates a new image with a copy of the data in \c *this. Uses \p queue
+ /// to perform the copy operation.
+ image3d clone(command_queue &queue) const;
+};
+
+/// \internal_ define get_info() specializations for image3d
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image3d,
+ ((cl_image_format, CL_IMAGE_FORMAT))
+ ((size_t, CL_IMAGE_ELEMENT_SIZE))
+ ((size_t, CL_IMAGE_ROW_PITCH))
+ ((size_t, CL_IMAGE_SLICE_PITCH))
+ ((size_t, CL_IMAGE_WIDTH))
+ ((size_t, CL_IMAGE_HEIGHT))
+ ((size_t, CL_IMAGE_DEPTH))
+)
+
+namespace detail {
+
+// set_kernel_arg() specialization for image3d
+template<>
+struct set_kernel_arg<image3d> : public set_kernel_arg<image_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(boost::compute::image3d, image3d_t)
+
+#endif // BOOST_COMPUTE_IMAGE_IMAGE3D_HPP
diff --git a/boost/compute/image/image_format.hpp b/boost/compute/image/image_format.hpp
new file mode 100644
index 0000000000..a6ecf83ef6
--- /dev/null
+++ b/boost/compute/image/image_format.hpp
@@ -0,0 +1,135 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP
+#define BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP
+
+#include <boost/compute/cl.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class image_format
+/// \brief A OpenCL image format
+///
+/// For example, to create a format for a 8-bit RGBA image:
+/// \code
+/// boost::compute::image_format rgba8(CL_RGBA, CL_UNSIGNED_INT8);
+/// \endcode
+///
+/// After being constructed, image_format objects are usually passed to the
+/// constructor of the various image classes (e.g. \ref image2d, \ref image3d)
+/// to create an image object on a compute device.
+///
+/// Image formats supported by a context can be queried with the static
+/// get_supported_formats() in each image class. For example:
+/// \code
+/// std::vector<image_format> formats = image2d::get_supported_formats(ctx);
+/// \endcode
+///
+/// \see image2d
+class image_format
+{
+public:
+ enum channel_order {
+ r = CL_R,
+ a = CL_A,
+ intensity = CL_INTENSITY,
+ luminance = CL_LUMINANCE,
+ rg = CL_RG,
+ ra = CL_RA,
+ rgb = CL_RGB,
+ rgba = CL_RGBA,
+ argb = CL_ARGB,
+ bgra = CL_BGRA
+ };
+
+ enum channel_data_type {
+ snorm_int8 = CL_SNORM_INT8,
+ snorm_int16 = CL_SNORM_INT16,
+ unorm_int8 = CL_UNORM_INT8,
+ unorm_int16 = CL_UNORM_INT16,
+ unorm_short_565 = CL_UNORM_SHORT_565,
+ unorm_short_555 = CL_UNORM_SHORT_555,
+ unorm_int_101010 = CL_UNORM_INT_101010,
+ signed_int8 = CL_SIGNED_INT8,
+ signed_int16 = CL_SIGNED_INT16,
+ signed_int32 = CL_SIGNED_INT32,
+ unsigned_int8 = CL_UNSIGNED_INT8,
+ unsigned_int16 = CL_UNSIGNED_INT16,
+ unsigned_int32 = CL_UNSIGNED_INT32,
+ float16 = CL_HALF_FLOAT,
+ float32 = CL_FLOAT
+ };
+
+ /// Creates a new image format object with \p order and \p type.
+ explicit image_format(cl_channel_order order, cl_channel_type type)
+ {
+ m_format.image_channel_order = order;
+ m_format.image_channel_data_type = type;
+ }
+
+ /// Creates a new image format object from \p format.
+ explicit image_format(const cl_image_format &format)
+ {
+ m_format.image_channel_order = format.image_channel_order;
+ m_format.image_channel_data_type = format.image_channel_data_type;
+ }
+
+ /// Creates a new image format object as a copy of \p other.
+ image_format(const image_format &other)
+ : m_format(other.m_format)
+ {
+ }
+
+ /// Copies the format from \p other to \c *this.
+ image_format& operator=(const image_format &other)
+ {
+ if(this != &other){
+ m_format = other.m_format;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the image format object.
+ ~image_format()
+ {
+ }
+
+ /// Returns a pointer to the \c cl_image_format object.
+ const cl_image_format* get_format_ptr() const
+ {
+ return &m_format;
+ }
+
+ /// Returns \c true if \c *this is the same as \p other.
+ bool operator==(const image_format &other) const
+ {
+ return m_format.image_channel_order ==
+ other.m_format.image_channel_order &&
+ m_format.image_channel_data_type ==
+ other.m_format.image_channel_data_type;
+ }
+
+ /// Returns \c true if \c *this is not the same as \p other.
+ bool operator!=(const image_format &other) const
+ {
+ return !(*this == other);
+ }
+
+private:
+ cl_image_format m_format;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP
diff --git a/boost/compute/image/image_object.hpp b/boost/compute/image/image_object.hpp
new file mode 100644
index 0000000000..451c68568f
--- /dev/null
+++ b/boost/compute/image/image_object.hpp
@@ -0,0 +1,170 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP
+#define BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP
+
+#include <algorithm>
+#include <vector>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/memory_object.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/image/image_format.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class image_object
+/// \brief Base-class for image objects.
+///
+/// The image_object class is the base-class for image objects on compute
+/// devices.
+///
+/// \see image1d, image2d, image3d
+class image_object : public memory_object
+{
+public:
+ image_object()
+ : memory_object()
+ {
+ }
+
+ explicit image_object(cl_mem mem, bool retain = true)
+ : memory_object(mem, retain)
+ {
+ }
+
+ image_object(const image_object &other)
+ : memory_object(other)
+ {
+ }
+
+ image_object& operator=(const image_object &other)
+ {
+ if(this != &other){
+ memory_object::operator=(other);
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ image_object(image_object&& other) BOOST_NOEXCEPT
+ : memory_object(std::move(other))
+ {
+ }
+
+ /// \internal_
+ image_object& operator=(image_object&& other) BOOST_NOEXCEPT
+ {
+ memory_object::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the image object.
+ ~image_object()
+ {
+ }
+
+ /// Returns information about the image object.
+ ///
+ /// \see_opencl_ref{clGetImageInfo}
+ template<class T>
+ T get_image_info(cl_mem_info info) const
+ {
+ return detail::get_object_info<T>(clGetImageInfo, m_mem, info);
+ }
+
+ /// Returns the format for the image.
+ image_format format() const
+ {
+ return image_format(get_image_info<cl_image_format>(CL_IMAGE_FORMAT));
+ }
+
+ /// \internal_ (deprecated)
+ image_format get_format() const
+ {
+ return format();
+ }
+
+ /// Returns the width of the image.
+ size_t width() const
+ {
+ return get_image_info<size_t>(CL_IMAGE_WIDTH);
+ }
+
+ /// Returns the height of the image.
+ ///
+ /// For 1D images, this function will return \c 1.
+ size_t height() const
+ {
+ return get_image_info<size_t>(CL_IMAGE_HEIGHT);
+ }
+
+ /// Returns the depth of the image.
+ ///
+ /// For 1D and 2D images, this function will return \c 1.
+ size_t depth() const
+ {
+ return get_image_info<size_t>(CL_IMAGE_DEPTH);
+ }
+
+ /// Returns the supported image formats for the \p type in \p context.
+ ///
+ /// \see_opencl_ref{clGetSupportedImageFormats}
+ static std::vector<image_format>
+ get_supported_formats(const context &context,
+ cl_mem_object_type type,
+ cl_mem_flags flags = read_write)
+ {
+ cl_uint count = 0;
+ clGetSupportedImageFormats(context, flags, type, 0, 0, &count);
+
+ std::vector<cl_image_format> cl_formats(count);
+ clGetSupportedImageFormats(context, flags, type, count, &cl_formats[0], 0);
+
+ std::vector<image_format> formats;
+ formats.reserve(count);
+
+ for(cl_uint i = 0; i < count; i++){
+ formats.push_back(image_format(cl_formats[i]));
+ }
+
+ return formats;
+ }
+
+ /// Returns \c true if \p format is a supported image format for
+ /// \p type in \p context with \p flags.
+ static bool is_supported_format(const image_format &format,
+ const context &context,
+ cl_mem_object_type type,
+ cl_mem_flags flags = read_write)
+ {
+ const std::vector<image_format> formats =
+ get_supported_formats(context, type, flags);
+
+ return std::find(formats.begin(), formats.end(), format) != formats.end();
+ }
+};
+
+namespace detail {
+
+// set_kernel_arg() specialization for image_object
+template<>
+struct set_kernel_arg<image_object> : public set_kernel_arg<memory_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP
diff --git a/boost/compute/image/image_sampler.hpp b/boost/compute/image/image_sampler.hpp
new file mode 100644
index 0000000000..4f1bfe9b86
--- /dev/null
+++ b/boost/compute/image/image_sampler.hpp
@@ -0,0 +1,221 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP
+#define BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class image_sampler
+/// \brief An OpenCL image sampler object
+///
+/// \see image2d, image_format
+class image_sampler
+{
+public:
+ enum addressing_mode {
+ none = CL_ADDRESS_NONE,
+ clamp_to_edge = CL_ADDRESS_CLAMP_TO_EDGE,
+ clamp = CL_ADDRESS_CLAMP,
+ repeat = CL_ADDRESS_REPEAT
+ };
+
+ enum filter_mode {
+ nearest = CL_FILTER_NEAREST,
+ linear = CL_FILTER_LINEAR
+ };
+
+ image_sampler()
+ : m_sampler(0)
+ {
+ }
+
+ image_sampler(const context &context,
+ bool normalized_coords,
+ cl_addressing_mode addressing_mode,
+ cl_filter_mode filter_mode)
+ {
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_2_0
+ std::vector<cl_sampler_properties> sampler_properties;
+ sampler_properties.push_back(CL_SAMPLER_NORMALIZED_COORDS);
+ sampler_properties.push_back(cl_sampler_properties(normalized_coords));
+ sampler_properties.push_back(CL_SAMPLER_ADDRESSING_MODE);
+ sampler_properties.push_back(cl_sampler_properties(addressing_mode));
+ sampler_properties.push_back(CL_SAMPLER_FILTER_MODE);
+ sampler_properties.push_back(cl_sampler_properties(filter_mode));
+ sampler_properties.push_back(cl_sampler_properties(0));
+
+ m_sampler = clCreateSamplerWithProperties(
+ context, &sampler_properties[0], &error
+ );
+ #else
+ m_sampler = clCreateSampler(
+ context, normalized_coords, addressing_mode, filter_mode, &error
+ );
+ #endif
+
+ if(!m_sampler){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ explicit image_sampler(cl_sampler sampler, bool retain = true)
+ : m_sampler(sampler)
+ {
+ if(m_sampler && retain){
+ clRetainSampler(m_sampler);
+ }
+ }
+
+ /// Creates a new image sampler object as a copy of \p other.
+ image_sampler(const image_sampler &other)
+ : m_sampler(other.m_sampler)
+ {
+ if(m_sampler){
+ clRetainSampler(m_sampler);
+ }
+ }
+
+ /// Copies the image sampler object from \p other to \c *this.
+ image_sampler& operator=(const image_sampler &other)
+ {
+ if(this != &other){
+ if(m_sampler){
+ clReleaseSampler(m_sampler);
+ }
+
+ m_sampler = other.m_sampler;
+
+ if(m_sampler){
+ clRetainSampler(m_sampler);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ image_sampler(image_sampler&& other) BOOST_NOEXCEPT
+ : m_sampler(other.m_sampler)
+ {
+ other.m_sampler = 0;
+ }
+
+ image_sampler& operator=(image_sampler&& other) BOOST_NOEXCEPT
+ {
+ if(m_sampler){
+ clReleaseSampler(m_sampler);
+ }
+
+ m_sampler = other.m_sampler;
+ other.m_sampler = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the image sampler object.
+ ~image_sampler()
+ {
+ if(m_sampler){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseSampler(m_sampler)
+ );
+ }
+ }
+
+ /// Returns the underlying \c cl_sampler object.
+ cl_sampler& get() const
+ {
+ return const_cast<cl_sampler &>(m_sampler);
+ }
+
+ /// Returns the context for the image sampler object.
+ context get_context() const
+ {
+ return context(get_info<cl_context>(CL_SAMPLER_CONTEXT));
+ }
+
+ /// Returns information about the sampler.
+ ///
+ /// \see_opencl_ref{clGetSamplerInfo}
+ template<class T>
+ T get_info(cl_sampler_info info) const
+ {
+ return detail::get_object_info<T>(clGetSamplerInfo, m_sampler, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<image_sampler, Enum>::type
+ get_info() const;
+
+ /// Returns \c true if the sampler is the same at \p other.
+ bool operator==(const image_sampler &other) const
+ {
+ return m_sampler == other.m_sampler;
+ }
+
+ /// Returns \c true if the sampler is different from \p other.
+ bool operator!=(const image_sampler &other) const
+ {
+ return m_sampler != other.m_sampler;
+ }
+
+ operator cl_sampler() const
+ {
+ return m_sampler;
+ }
+
+private:
+ cl_sampler m_sampler;
+};
+
+/// \internal_ define get_info() specializations for image_sampler
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image_sampler,
+ ((cl_uint, CL_SAMPLER_REFERENCE_COUNT))
+ ((cl_context, CL_SAMPLER_CONTEXT))
+ ((cl_addressing_mode, CL_SAMPLER_ADDRESSING_MODE))
+ ((cl_filter_mode, CL_SAMPLER_FILTER_MODE))
+ ((bool, CL_SAMPLER_NORMALIZED_COORDS))
+)
+
+namespace detail {
+
+// set_kernel_arg specialization for image samplers
+template<>
+struct set_kernel_arg<image_sampler>
+{
+ void operator()(kernel &kernel_, size_t index, const image_sampler &sampler)
+ {
+ kernel_.set_arg(index, sampler.get());
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(boost::compute::image_sampler, sampler_t)
+
+#endif // BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP
diff --git a/boost/compute/image2d.hpp b/boost/compute/image2d.hpp
new file mode 100644
index 0000000000..68460813da
--- /dev/null
+++ b/boost/compute/image2d.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/image/image2d.hpp> instead
+#include <boost/compute/image/image2d.hpp>
diff --git a/boost/compute/image3d.hpp b/boost/compute/image3d.hpp
new file mode 100644
index 0000000000..ab7467c4f3
--- /dev/null
+++ b/boost/compute/image3d.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/image/image3d.hpp> instead
+#include <boost/compute/image/image3d.hpp>
diff --git a/boost/compute/image_format.hpp b/boost/compute/image_format.hpp
new file mode 100644
index 0000000000..1ee50014aa
--- /dev/null
+++ b/boost/compute/image_format.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/image/image_format.hpp> instead
+#include <boost/compute/image/image_format.hpp>
diff --git a/boost/compute/image_sampler.hpp b/boost/compute/image_sampler.hpp
new file mode 100644
index 0000000000..1cd11608b9
--- /dev/null
+++ b/boost/compute/image_sampler.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/image/image_sampler.hpp> instead
+#include <boost/compute/image/image_sampler.hpp>
diff --git a/boost/compute/interop/eigen.hpp b/boost/compute/interop/eigen.hpp
new file mode 100644
index 0000000000..f616911417
--- /dev/null
+++ b/boost/compute/interop/eigen.hpp
@@ -0,0 +1,16 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_EIGEN_HPP
+#define BOOST_COMPUTE_INTEROP_EIGEN_HPP
+
+#include <boost/compute/interop/eigen/core.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_EIGEN_HPP
diff --git a/boost/compute/interop/eigen/core.hpp b/boost/compute/interop/eigen/core.hpp
new file mode 100644
index 0000000000..aa047efd32
--- /dev/null
+++ b/boost/compute/interop/eigen/core.hpp
@@ -0,0 +1,72 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP
+#define BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP
+
+#include <Eigen/Core>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Copies \p matrix to \p buffer.
+template<class Derived>
+inline void eigen_copy_matrix_to_buffer(const Eigen::PlainObjectBase<Derived> &matrix,
+ buffer_iterator<typename Derived::Scalar> buffer,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::copy_n(matrix.data(), matrix.size(), buffer, queue);
+}
+
+/// Copies \p buffer to \p matrix.
+template<class Derived>
+inline void eigen_copy_buffer_to_matrix(const buffer_iterator<typename Derived::Scalar> buffer,
+ Eigen::PlainObjectBase<Derived> &matrix,
+ command_queue &queue = system::default_queue())
+{
+ ::boost::compute::copy_n(buffer, matrix.size(), matrix.data(), queue);
+}
+
+/// Converts an \c Eigen::Matrix4f to a \c float16_.
+inline float16_ eigen_matrix4f_to_float16(const Eigen::Matrix4f &matrix)
+{
+ float16_ result;
+ std::memcpy(&result, matrix.data(), 16 * sizeof(float));
+ return result;
+}
+
+/// Converts an \c Eigen::Matrix4d to a \c double16_.
+inline double16_ eigen_matrix4d_to_double16(const Eigen::Matrix4d &matrix)
+{
+ double16_ result;
+ std::memcpy(&result, matrix.data(), 16 * sizeof(double));
+ return result;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2i, int2)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4i, int4)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4f, float4)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2f, float8)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4f, float16)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2d, double2)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4d, double4)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2d, double8)
+BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4d, double16)
+
+#endif // BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP
diff --git a/boost/compute/interop/opencv.hpp b/boost/compute/interop/opencv.hpp
new file mode 100644
index 0000000000..8cfa3a1c8a
--- /dev/null
+++ b/boost/compute/interop/opencv.hpp
@@ -0,0 +1,17 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENCV_HPP
+#define BOOST_COMPUTE_INTEROP_OPENCV_HPP
+
+#include <boost/compute/interop/opencv/core.hpp>
+#include <boost/compute/interop/opencv/highgui.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_OPENCV_HPP
diff --git a/boost/compute/interop/opencv/core.hpp b/boost/compute/interop/opencv/core.hpp
new file mode 100644
index 0000000000..e3c7bf6dda
--- /dev/null
+++ b/boost/compute/interop/opencv/core.hpp
@@ -0,0 +1,141 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP
+#define BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP
+
+#include <opencv2/core/core.hpp>
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/image/image2d.hpp>
+#include <boost/compute/image/image_format.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T>
+inline void opencv_copy_mat_to_buffer(const cv::Mat &mat,
+ buffer_iterator<T> buffer,
+ command_queue &queue = system::default_queue())
+{
+ BOOST_ASSERT(mat.isContinuous());
+
+ ::boost::compute::copy_n(
+ reinterpret_cast<T *>(mat.data), mat.rows * mat.cols, buffer, queue
+ );
+}
+
+template<class T>
+inline void opencv_copy_buffer_to_mat(const buffer_iterator<T> buffer,
+ cv::Mat &mat,
+ command_queue &queue = system::default_queue())
+{
+ BOOST_ASSERT(mat.isContinuous());
+
+ ::boost::compute::copy_n(
+ buffer, mat.cols * mat.rows, reinterpret_cast<T *>(mat.data), queue
+ );
+}
+
+inline void opencv_copy_mat_to_image(const cv::Mat &mat,
+ image2d &image,
+ command_queue &queue = system::default_queue())
+{
+ BOOST_ASSERT(mat.data != 0);
+ BOOST_ASSERT(mat.isContinuous());
+ BOOST_ASSERT(image.get_context() == queue.get_context());
+
+ queue.enqueue_write_image(image, image.origin(), image.size(), mat.data);
+}
+
+inline void opencv_copy_image_to_mat(const image2d &image,
+ cv::Mat &mat,
+ command_queue &queue = system::default_queue())
+{
+ BOOST_ASSERT(mat.isContinuous());
+ BOOST_ASSERT(image.get_context() == queue.get_context());
+
+ queue.enqueue_read_image(image, image.origin(), image.size(), mat.data);
+}
+
+inline image_format opencv_get_mat_image_format(const cv::Mat &mat)
+{
+ switch(mat.type()){
+ case CV_8UC4:
+ return image_format(CL_BGRA, CL_UNORM_INT8);
+ case CV_16UC4:
+ return image_format(CL_BGRA, CL_UNORM_INT16);
+ case CV_32F:
+ return image_format(CL_INTENSITY, CL_FLOAT);
+ case CV_32FC4:
+ return image_format(CL_RGBA, CL_FLOAT);
+ case CV_8UC1:
+ return image_format(CL_INTENSITY, CL_UNORM_INT8);
+ }
+
+ BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED));
+}
+
+inline cv::Mat opencv_create_mat_with_image2d(const image2d &image,
+ command_queue &queue = system::default_queue())
+{
+ BOOST_ASSERT(image.get_context() == queue.get_context());
+
+ cv::Mat mat;
+ image_format format = image.get_format();
+ const cl_image_format *cl_image_format = format.get_format_ptr();
+
+ if(cl_image_format->image_channel_data_type == CL_UNORM_INT8 &&
+ cl_image_format->image_channel_order == CL_BGRA)
+ {
+ mat = cv::Mat(image.height(), image.width(), CV_8UC4);
+ }
+ else if(cl_image_format->image_channel_data_type == CL_UNORM_INT16 &&
+ cl_image_format->image_channel_order == CL_BGRA)
+ {
+ mat = cv::Mat(image.height(), image.width(), CV_16UC4);
+ }
+ else if(cl_image_format->image_channel_data_type == CL_FLOAT &&
+ cl_image_format->image_channel_order == CL_INTENSITY)
+ {
+ mat = cv::Mat(image.height(), image.width(), CV_32FC1);
+ }
+ else
+ {
+ mat = cv::Mat(image.height(), image.width(), CV_8UC1);
+ }
+
+ opencv_copy_image_to_mat(image, mat, queue);
+
+ return mat;
+}
+
+inline image2d opencv_create_image2d_with_mat(const cv::Mat &mat,
+ cl_mem_flags flags,
+ command_queue &queue = system::default_queue())
+{
+ const context &context = queue.get_context();
+ const image_format format = opencv_get_mat_image_format(mat);
+
+ image2d image(context, mat.cols, mat.rows, format, flags);
+
+ opencv_copy_mat_to_image(mat, image, queue);
+
+ return image;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP
diff --git a/boost/compute/interop/opencv/highgui.hpp b/boost/compute/interop/opencv/highgui.hpp
new file mode 100644
index 0000000000..66baa728f6
--- /dev/null
+++ b/boost/compute/interop/opencv/highgui.hpp
@@ -0,0 +1,33 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP
+#define BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP
+
+#include <opencv2/highgui/highgui.hpp>
+
+#include <boost/compute/interop/opencv/core.hpp>
+
+namespace boost {
+namespace compute {
+
+inline void opencv_imshow(const std::string &winname,
+ const image2d &image,
+ command_queue &queue = system::default_queue())
+{
+ const cv::Mat mat = opencv_create_mat_with_image2d(image, queue);
+
+ cv::imshow(winname, mat);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP
diff --git a/boost/compute/interop/opencv/ocl.hpp b/boost/compute/interop/opencv/ocl.hpp
new file mode 100644
index 0000000000..23d8ac2273
--- /dev/null
+++ b/boost/compute/interop/opencv/ocl.hpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP
+#define BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP
+
+#include <opencv2/ocl/ocl.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/command_queue.hpp>
+
+namespace boost {
+namespace compute {
+
+context opencv_ocl_get_context()
+{
+ void *ocl_context = cv::ocl::getoclContext();
+ if(!ocl_context){
+ return context();
+ }
+
+ return context(*(static_cast<cl_context *>(ocl_context)));
+}
+
+command_queue opencv_ocl_get_command_queue()
+{
+ void *ocl_queue = cv::ocl::getoclCommandQueue();
+ if(!ocl_queue){
+ return command_queue();
+ }
+
+ return command_queue(*(static_cast<cl_command_queue *>(ocl_queue)));
+}
+
+buffer opencv_ocl_get_buffer(const cv::ocl::oclMat &mat)
+{
+ return buffer(reinterpret_cast<cl_mem>(mat.data));
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP
diff --git a/boost/compute/interop/opengl.hpp b/boost/compute/interop/opengl.hpp
new file mode 100644
index 0000000000..7ae12617ac
--- /dev/null
+++ b/boost/compute/interop/opengl.hpp
@@ -0,0 +1,24 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute OpenGL interop headers.
+
+#include <boost/compute/interop/opengl/acquire.hpp>
+#include <boost/compute/interop/opengl/context.hpp>
+#include <boost/compute/interop/opengl/opengl_buffer.hpp>
+#include <boost/compute/interop/opengl/opengl_renderbuffer.hpp>
+#include <boost/compute/interop/opengl/opengl_texture.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_HPP
diff --git a/boost/compute/interop/opengl/acquire.hpp b/boost/compute/interop/opengl/acquire.hpp
new file mode 100644
index 0000000000..10af4338fb
--- /dev/null
+++ b/boost/compute/interop/opengl/acquire.hpp
@@ -0,0 +1,99 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/interop/opengl/cl_gl.hpp>
+#include <boost/compute/interop/opengl/opengl_buffer.hpp>
+#include <boost/compute/utility/wait_list.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Enqueues a command to acquire the specified OpenGL memory objects.
+///
+/// \see_opencl_ref{clEnqueueAcquireGLObjects}
+inline event opengl_enqueue_acquire_gl_objects(size_t num_objects,
+ const cl_mem *mem_objects,
+ command_queue &queue,
+ const wait_list &events = wait_list())
+{
+ BOOST_ASSERT(queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueAcquireGLObjects(queue.get(),
+ num_objects,
+ mem_objects,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get());
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+}
+
+/// Enqueues a command to release the specified OpenGL memory objects.
+///
+/// \see_opencl_ref{clEnqueueReleaseGLObjects}
+inline event opengl_enqueue_release_gl_objects(size_t num_objects,
+ const cl_mem *mem_objects,
+ command_queue &queue,
+ const wait_list &events = wait_list())
+{
+ BOOST_ASSERT(queue != 0);
+
+ event event_;
+
+ cl_int ret = clEnqueueReleaseGLObjects(queue.get(),
+ num_objects,
+ mem_objects,
+ events.size(),
+ events.get_event_ptr(),
+ &event_.get());
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return event_;
+}
+
+/// Enqueues a command to acquire the specified OpenGL buffer.
+///
+/// \see_opencl_ref{clEnqueueAcquireGLObjects}
+inline event opengl_enqueue_acquire_buffer(const opengl_buffer &buffer,
+ command_queue &queue,
+ const wait_list &events = wait_list())
+{
+ BOOST_ASSERT(buffer.get_context() == queue.get_context());
+
+ return opengl_enqueue_acquire_gl_objects(1, &buffer.get(), queue, events);
+}
+
+/// Enqueues a command to release the specified OpenGL buffer.
+///
+/// \see_opencl_ref{clEnqueueReleaseGLObjects}
+inline event opengl_enqueue_release_buffer(const opengl_buffer &buffer,
+ command_queue &queue,
+ const wait_list &events = wait_list())
+{
+ BOOST_ASSERT(buffer.get_context() == queue.get_context());
+
+ return opengl_enqueue_release_gl_objects(1, &buffer.get(), queue, events);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP
diff --git a/boost/compute/interop/opengl/cl_gl.hpp b/boost/compute/interop/opengl/cl_gl.hpp
new file mode 100644
index 0000000000..de82dbd9ab
--- /dev/null
+++ b/boost/compute/interop/opengl/cl_gl.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP
+
+#if defined(__APPLE__)
+#include <OpenCL/cl_gl.h>
+#else
+#include <CL/cl_gl.h>
+#endif
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP
diff --git a/boost/compute/interop/opengl/cl_gl_ext.hpp b/boost/compute/interop/opengl/cl_gl_ext.hpp
new file mode 100644
index 0000000000..3392b051e0
--- /dev/null
+++ b/boost/compute/interop/opengl/cl_gl_ext.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP
+
+#if defined(__APPLE__)
+#include <OpenCL/cl_gl_ext.h>
+#else
+#include <CL/cl_gl_ext.h>
+#endif
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP
diff --git a/boost/compute/interop/opengl/context.hpp b/boost/compute/interop/opengl/context.hpp
new file mode 100644
index 0000000000..754dca2236
--- /dev/null
+++ b/boost/compute/interop/opengl/context.hpp
@@ -0,0 +1,135 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/device.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/exception/unsupported_extension_error.hpp>
+#include <boost/compute/interop/opengl/cl_gl.hpp>
+
+#ifdef __APPLE__
+#include <OpenCL/cl_gl_ext.h>
+#include <OpenGL/OpenGL.h>
+#endif
+
+#ifdef __linux__
+#include <GL/glx.h>
+#endif
+
+namespace boost {
+namespace compute {
+
+/// Creates a shared OpenCL/OpenGL context for the currently active
+/// OpenGL context.
+///
+/// Once created, the shared context can be used to create OpenCL memory
+/// objects which can interact with OpenGL memory objects (e.g. VBOs).
+///
+/// \throws unsupported_extension_error if no CL-GL sharing capable devices
+/// are found.
+inline context opengl_create_shared_context()
+{
+ // name of the OpenGL sharing extension for the system
+#if defined(__APPLE__)
+ const char *cl_gl_sharing_extension = "cl_APPLE_gl_sharing";
+#else
+ const char *cl_gl_sharing_extension = "cl_khr_gl_sharing";
+#endif
+
+#if defined(__APPLE__)
+ // get OpenGL share group
+ CGLContextObj cgl_current_context = CGLGetCurrentContext();
+ CGLShareGroupObj cgl_share_group = CGLGetShareGroup(cgl_current_context);
+
+ cl_context_properties properties[] = {
+ CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
+ (cl_context_properties) cgl_share_group,
+ 0
+ };
+
+ cl_int error = 0;
+ cl_context cl_gl_context = clCreateContext(properties, 0, 0, 0, 0, &error);
+ if(!cl_gl_context){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ return context(cl_gl_context, false);
+#else
+ typedef cl_int(*GetGLContextInfoKHRFunction)(
+ const cl_context_properties*, cl_gl_context_info, size_t, void *, size_t *
+ );
+
+ std::vector<platform> platforms = system::platforms();
+ for(size_t i = 0; i < platforms.size(); i++){
+ const platform &platform = platforms[i];
+
+ // load clGetGLContextInfoKHR() extension function
+ GetGLContextInfoKHRFunction GetGLContextInfoKHR =
+ reinterpret_cast<GetGLContextInfoKHRFunction>(
+ reinterpret_cast<unsigned long>(
+ platform.get_extension_function_address("clGetGLContextInfoKHR")
+ )
+ );
+ if(!GetGLContextInfoKHR){
+ continue;
+ }
+
+ // create context properties listing the platform and current OpenGL display
+ cl_context_properties properties[] = {
+ CL_CONTEXT_PLATFORM, (cl_context_properties) platform.id(),
+ #if defined(__linux__)
+ CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(),
+ CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(),
+ #elif defined(WIN32)
+ CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(),
+ CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(),
+ #endif
+ 0
+ };
+
+ // lookup current OpenCL device for current OpenGL context
+ cl_device_id gpu_id;
+ cl_int ret = GetGLContextInfoKHR(
+ properties,
+ CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+ sizeof(cl_device_id),
+ &gpu_id,
+ 0
+ );
+ if(ret != CL_SUCCESS){
+ continue;
+ }
+
+ // create device object for the GPU and ensure it supports CL-GL sharing
+ device gpu(gpu_id, false);
+ if(!gpu.supports_extension(cl_gl_sharing_extension)){
+ continue;
+ }
+
+ // return CL-GL sharing context
+ return context(gpu, properties);
+ }
+#endif
+
+ // no CL-GL sharing capable devices found
+ BOOST_THROW_EXCEPTION(
+ unsupported_extension_error(cl_gl_sharing_extension)
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP
diff --git a/boost/compute/interop/opengl/gl.hpp b/boost/compute/interop/opengl/gl.hpp
new file mode 100644
index 0000000000..a05c944075
--- /dev/null
+++ b/boost/compute/interop/opengl/gl.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP
+
+#if defined(__APPLE__)
+#include <OpenGL/gl.h>
+#else
+#include <GL/gl.h>
+#endif
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP
diff --git a/boost/compute/interop/opengl/opengl_buffer.hpp b/boost/compute/interop/opengl/opengl_buffer.hpp
new file mode 100644
index 0000000000..c27347d0d9
--- /dev/null
+++ b/boost/compute/interop/opengl/opengl_buffer.hpp
@@ -0,0 +1,106 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/interop/opengl/gl.hpp>
+#include <boost/compute/interop/opengl/cl_gl.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class opengl_buffer
+///
+/// A OpenCL buffer for accessing an OpenGL memory object.
+class opengl_buffer : public buffer
+{
+public:
+ /// Creates a null OpenGL buffer object.
+ opengl_buffer()
+ : buffer()
+ {
+ }
+
+ /// Creates a new OpenGL buffer object for \p mem.
+ explicit opengl_buffer(cl_mem mem, bool retain = true)
+ : buffer(mem, retain)
+ {
+ }
+
+ /// Creates a new OpenGL buffer object in \p context for \p bufobj
+ /// with \p flags.
+ ///
+ /// \see_opencl_ref{clCreateFromGLBuffer}
+ opengl_buffer(const context &context,
+ GLuint bufobj,
+ cl_mem_flags flags = read_write)
+ {
+ cl_int error = 0;
+ m_mem = clCreateFromGLBuffer(context, flags, bufobj, &error);
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new OpenGL buffer object as a copy of \p other.
+ opengl_buffer(const opengl_buffer &other)
+ : buffer(other)
+ {
+ }
+
+ /// Copies the OpenGL buffer object from \p other.
+ opengl_buffer& operator=(const opengl_buffer &other)
+ {
+ if(this != &other){
+ buffer::operator=(other);
+ }
+
+ return *this;
+ }
+
+ /// Destroys the OpenGL buffer object.
+ ~opengl_buffer()
+ {
+ }
+
+ /// Returns the OpenGL memory object ID.
+ ///
+ /// \see_opencl_ref{clGetGLObjectInfo}
+ GLuint get_opengl_object() const
+ {
+ GLuint object = 0;
+ clGetGLObjectInfo(m_mem, 0, &object);
+ return object;
+ }
+
+ /// Returns the OpenGL memory object type.
+ ///
+ /// \see_opencl_ref{clGetGLObjectInfo}
+ cl_gl_object_type get_opengl_type() const
+ {
+ cl_gl_object_type type;
+ clGetGLObjectInfo(m_mem, &type, 0);
+ return type;
+ }
+};
+
+namespace detail {
+
+// set_kernel_arg specialization for opengl_buffer
+template<>
+struct set_kernel_arg<opengl_buffer> : set_kernel_arg<memory_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP
diff --git a/boost/compute/interop/opengl/opengl_renderbuffer.hpp b/boost/compute/interop/opengl/opengl_renderbuffer.hpp
new file mode 100644
index 0000000000..fd4759d0ee
--- /dev/null
+++ b/boost/compute/interop/opengl/opengl_renderbuffer.hpp
@@ -0,0 +1,129 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP
+
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/interop/opengl/gl.hpp>
+#include <boost/compute/interop/opengl/cl_gl.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class opengl_renderbuffer
+///
+/// A OpenCL buffer for accessing an OpenGL renderbuffer object.
+class opengl_renderbuffer : public image_object
+{
+public:
+ /// Creates a null OpenGL renderbuffer object.
+ opengl_renderbuffer()
+ : image_object()
+ {
+ }
+
+ /// Creates a new OpenGL renderbuffer object for \p mem.
+ explicit opengl_renderbuffer(cl_mem mem, bool retain = true)
+ : image_object(mem, retain)
+ {
+ }
+
+ /// Creates a new OpenGL renderbuffer object in \p context for
+ /// \p renderbuffer with \p flags.
+ ///
+ /// \see_opencl_ref{clCreateFromGLRenderbuffer}
+ opengl_renderbuffer(const context &context,
+ GLuint renderbuffer,
+ cl_mem_flags flags = read_write)
+ {
+ cl_int error = 0;
+
+ m_mem = clCreateFromGLRenderbuffer(
+ context, flags, renderbuffer, &error
+ );
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new OpenGL renderbuffer object as a copy of \p other.
+ opengl_renderbuffer(const opengl_renderbuffer &other)
+ : image_object(other)
+ {
+ }
+
+ /// Copies the OpenGL renderbuffer object from \p other.
+ opengl_renderbuffer& operator=(const opengl_renderbuffer &other)
+ {
+ if(this != &other){
+ image_object::operator=(other);
+ }
+
+ return *this;
+ }
+
+ /// Destroys the OpenGL buffer object.
+ ~opengl_renderbuffer()
+ {
+ }
+
+ /// Returns the size (width, height) of the renderbuffer.
+ extents<2> size() const
+ {
+ extents<2> size;
+ size[0] = get_image_info<size_t>(CL_IMAGE_WIDTH);
+ size[1] = get_image_info<size_t>(CL_IMAGE_HEIGHT);
+ return size;
+ }
+
+ /// Returns the origin of the renderbuffer (\c 0, \c 0).
+ extents<2> origin() const
+ {
+ return extents<2>();
+ }
+
+ /// Returns the OpenGL memory object ID.
+ ///
+ /// \see_opencl_ref{clGetGLObjectInfo}
+ GLuint get_opengl_object() const
+ {
+ GLuint object = 0;
+ clGetGLObjectInfo(m_mem, 0, &object);
+ return object;
+ }
+
+ /// Returns the OpenGL memory object type.
+ ///
+ /// \see_opencl_ref{clGetGLObjectInfo}
+ cl_gl_object_type get_opengl_type() const
+ {
+ cl_gl_object_type type;
+ clGetGLObjectInfo(m_mem, &type, 0);
+ return type;
+ }
+};
+
+namespace detail {
+
+// set_kernel_arg() specialization for opengl_renderbuffer
+template<>
+struct set_kernel_arg<opengl_renderbuffer> : public set_kernel_arg<image_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(boost::compute::opengl_renderbuffer, image2d_t)
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP
diff --git a/boost/compute/interop/opengl/opengl_texture.hpp b/boost/compute/interop/opengl/opengl_texture.hpp
new file mode 100644
index 0000000000..c1f3f4f441
--- /dev/null
+++ b/boost/compute/interop/opengl/opengl_texture.hpp
@@ -0,0 +1,133 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP
+#define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP
+
+#include <boost/compute/image/image_object.hpp>
+#include <boost/compute/interop/opengl/gl.hpp>
+#include <boost/compute/interop/opengl/cl_gl.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class opengl_texture
+///
+/// A OpenCL image2d for accessing an OpenGL texture object.
+class opengl_texture : public image_object
+{
+public:
+ /// Creates a null OpenGL texture object.
+ opengl_texture()
+ : image_object()
+ {
+ }
+
+ /// Creates a new OpenGL texture object for \p mem.
+ explicit opengl_texture(cl_mem mem, bool retain = true)
+ : image_object(mem, retain)
+ {
+ }
+
+ /// Creates a new OpenGL texture object in \p context for \p texture
+ /// with \p flags.
+ ///
+ /// \see_opencl_ref{clCreateFromGLTexture}
+ opengl_texture(const context &context,
+ GLenum texture_target,
+ GLint miplevel,
+ GLuint texture,
+ cl_mem_flags flags = read_write)
+ {
+ cl_int error = 0;
+
+ #ifdef CL_VERSION_1_2
+ m_mem = clCreateFromGLTexture(context,
+ flags,
+ texture_target,
+ miplevel,
+ texture,
+ &error);
+ #else
+ m_mem = clCreateFromGLTexture2D(context,
+ flags,
+ texture_target,
+ miplevel,
+ texture,
+ &error);
+ #endif
+
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new OpenGL texture object as a copy of \p other.
+ opengl_texture(const opengl_texture &other)
+ : image_object(other)
+ {
+ }
+
+ /// Copies the OpenGL texture object from \p other.
+ opengl_texture& operator=(const opengl_texture &other)
+ {
+ if(this != &other){
+ image_object::operator=(other);
+ }
+
+ return *this;
+ }
+
+ /// Destroys the texture object.
+ ~opengl_texture()
+ {
+ }
+
+ /// Returns the size (width, height) of the texture.
+ extents<2> size() const
+ {
+ extents<2> size;
+ size[0] = get_image_info<size_t>(CL_IMAGE_WIDTH);
+ size[1] = get_image_info<size_t>(CL_IMAGE_HEIGHT);
+ return size;
+ }
+
+ /// Returns the origin of the texture (\c 0, \c 0).
+ extents<2> origin() const
+ {
+ return extents<2>();
+ }
+
+ /// Returns information about the texture.
+ ///
+ /// \see_opencl_ref{clGetGLTextureInfo}
+ template<class T>
+ T get_texture_info(cl_gl_texture_info info) const
+ {
+ return detail::get_object_info<T>(clGetGLTextureInfo, m_mem, info);
+ }
+};
+
+namespace detail {
+
+// set_kernel_arg() specialization for opengl_texture
+template<>
+struct set_kernel_arg<opengl_texture> : public set_kernel_arg<image_object> { };
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+BOOST_COMPUTE_TYPE_NAME(boost::compute::opengl_texture, image2d_t)
+
+#endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP
diff --git a/boost/compute/interop/qt.hpp b/boost/compute/interop/qt.hpp
new file mode 100644
index 0000000000..f53691cc34
--- /dev/null
+++ b/boost/compute/interop/qt.hpp
@@ -0,0 +1,17 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_HPP
+#define BOOST_COMPUTE_INTEROP_QT_HPP
+
+#include <boost/compute/interop/qt/qtcore.hpp>
+#include <boost/compute/interop/qt/qtgui.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_QT_HPP
diff --git a/boost/compute/interop/qt/qimage.hpp b/boost/compute/interop/qt/qimage.hpp
new file mode 100644
index 0000000000..faa6f98cc8
--- /dev/null
+++ b/boost/compute/interop/qt/qimage.hpp
@@ -0,0 +1,69 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP
+#define BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/image/image2d.hpp>
+#include <boost/compute/image/image_format.hpp>
+#include <boost/compute/utility/dim.hpp>
+
+#include <QImage>
+
+namespace boost {
+namespace compute {
+
+inline image_format qt_qimage_format_to_image_format(const QImage::Format &format)
+{
+ if(format == QImage::Format_RGB32){
+ return image_format(image_format::bgra, image_format::unorm_int8);
+ }
+
+ BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED));
+}
+
+inline QImage::Format qt_image_format_to_qimage_format(const image_format &format)
+{
+ if(format == image_format(image_format::bgra, image_format::unorm_int8)){
+ return QImage::Format_RGB32;
+ }
+
+ return QImage::Format_Invalid;
+}
+
+inline image_format qt_qimage_get_format(const QImage &image)
+{
+ return qt_qimage_format_to_image_format(image.format());
+}
+
+inline void qt_copy_qimage_to_image2d(const QImage &qimage,
+ image2d &image,
+ command_queue &queue)
+{
+ queue.enqueue_write_image(image, image.origin(), image.size(), qimage.constBits());
+}
+
+inline void qt_copy_image2d_to_qimage(const image2d &image,
+ QImage &qimage,
+ command_queue &queue)
+{
+ queue.enqueue_read_image(
+ image, dim(0, 0), dim(qimage.width(), qimage.height()), qimage.bits()
+ );
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP
diff --git a/boost/compute/interop/qt/qpoint.hpp b/boost/compute/interop/qt/qpoint.hpp
new file mode 100644
index 0000000000..d867fc7a43
--- /dev/null
+++ b/boost/compute/interop/qt/qpoint.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP
+#define BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP
+
+#include <QPoint>
+
+#include <boost/compute/type_traits/type_name.hpp>
+
+BOOST_COMPUTE_TYPE_NAME(QPoint, "int2")
+
+#endif // BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP
diff --git a/boost/compute/interop/qt/qpointf.hpp b/boost/compute/interop/qt/qpointf.hpp
new file mode 100644
index 0000000000..fd7392a804
--- /dev/null
+++ b/boost/compute/interop/qt/qpointf.hpp
@@ -0,0 +1,20 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP
+#define BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP
+
+#include <QPointF>
+
+#include <boost/compute/type_traits/type_name.hpp>
+
+BOOST_COMPUTE_TYPE_NAME(QPointF, "float2")
+
+#endif // BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP
diff --git a/boost/compute/interop/qt/qtcore.hpp b/boost/compute/interop/qt/qtcore.hpp
new file mode 100644
index 0000000000..b8978180e0
--- /dev/null
+++ b/boost/compute/interop/qt/qtcore.hpp
@@ -0,0 +1,18 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP
+#define BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP
+
+#include <boost/compute/interop/qt/qpoint.hpp>
+#include <boost/compute/interop/qt/qpointf.hpp>
+#include <boost/compute/interop/qt/qvector.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP
diff --git a/boost/compute/interop/qt/qtgui.hpp b/boost/compute/interop/qt/qtgui.hpp
new file mode 100644
index 0000000000..f1078f48b9
--- /dev/null
+++ b/boost/compute/interop/qt/qtgui.hpp
@@ -0,0 +1,16 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP
+#define BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP
+
+#include <boost/compute/interop/qt/qimage.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP
diff --git a/boost/compute/interop/qt/qvector.hpp b/boost/compute/interop/qt/qvector.hpp
new file mode 100644
index 0000000000..3ac93d2aaa
--- /dev/null
+++ b/boost/compute/interop/qt/qvector.hpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP
+#define BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP
+
+#include <boost/compute/detail/is_contiguous_iterator.hpp>
+
+#include <QVector>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_same<
+ Iterator,
+ typename QVector<typename Iterator::value_type>::iterator
+ >::type
+ >::type
+> : public boost::true_type {};
+
+template<class Iterator>
+struct _is_contiguous_iterator<
+ Iterator,
+ typename boost::enable_if<
+ typename boost::is_same<
+ Iterator,
+ typename QVector<typename Iterator::value_type>::const_iterator
+ >::type
+ >::type
+> : public boost::true_type {};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP
diff --git a/boost/compute/interop/vtk.hpp b/boost/compute/interop/vtk.hpp
new file mode 100644
index 0000000000..3f866e5f8f
--- /dev/null
+++ b/boost/compute/interop/vtk.hpp
@@ -0,0 +1,19 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_VTK_HPP
+#define BOOST_COMPUTE_INTEROP_VTK_HPP
+
+#include <boost/compute/interop/vtk/bounds.hpp>
+#include <boost/compute/interop/vtk/data_array.hpp>
+#include <boost/compute/interop/vtk/matrix4x4.hpp>
+#include <boost/compute/interop/vtk/points.hpp>
+
+#endif // BOOST_COMPUTE_INTEROP_VTK_HPP
diff --git a/boost/compute/interop/vtk/bounds.hpp b/boost/compute/interop/vtk/bounds.hpp
new file mode 100644
index 0000000000..360a9061f9
--- /dev/null
+++ b/boost/compute/interop/vtk/bounds.hpp
@@ -0,0 +1,59 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP
+#define BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP
+
+#include <vector>
+#include <iterator>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/algorithm/reduce.hpp>
+#include <boost/compute/container/array.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Calculates the bounds for the points in the range [\p first, \p last) and
+/// stores the result in \p bounds.
+///
+/// For example, this can be used to implement the GetBounds() method for a
+/// vtkMapper subclass.
+template<class PointIterator>
+inline void vtk_compute_bounds(PointIterator first,
+ PointIterator last,
+ double bounds[6],
+ command_queue &queue = system::default_queue())
+{
+ typedef typename std::iterator_traits<PointIterator>::value_type T;
+
+ const context &context = queue.get_context();
+
+ // compute min and max point
+ array<T, 2> extrema(context);
+ reduce(first, last, extrema.begin() + 0, min<T>(), queue);
+ reduce(first, last, extrema.begin() + 1, max<T>(), queue);
+
+ // copy results to host buffer
+ std::vector<T> buffer(2);
+ copy_n(extrema.begin(), 2, buffer.begin(), queue);
+
+ // copy to vtk-style bounds
+ bounds[0] = buffer[0][0]; bounds[1] = buffer[1][0];
+ bounds[2] = buffer[0][1]; bounds[3] = buffer[1][1];
+ bounds[4] = buffer[0][2]; bounds[5] = buffer[1][2];
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP
diff --git a/boost/compute/interop/vtk/data_array.hpp b/boost/compute/interop/vtk/data_array.hpp
new file mode 100644
index 0000000000..7b909b1a3f
--- /dev/null
+++ b/boost/compute/interop/vtk/data_array.hpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP
+#define BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP
+
+#include <vtkDataArray.h>
+#include <vtkDataArrayTemplate.h>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Copies the values in \p data to \p buffer.
+template<class T>
+inline void vtk_copy_data_array_to_buffer(const vtkDataArray *data,
+ buffer_iterator<T> buffer,
+ command_queue &queue = system::default_queue());
+
+/// \internal_
+template<class T>
+inline void vtk_copy_data_array_to_buffer(const vtkDataArrayTemplate<T> *data,
+ buffer_iterator<T> buffer,
+ command_queue &queue = system::default_queue())
+{
+ vtkDataArrayTemplate<T> *data_ = const_cast<vtkDataArrayTemplate<T> *>(data);
+ const T *data_ptr = static_cast<const T *>(data_->GetVoidPointer(0));
+ size_t data_size = data_->GetNumberOfComponents() * data_->GetNumberOfTuples();
+ ::boost::compute::copy_n(data_ptr, data_size, buffer, queue);
+}
+
+/// Copies the values in the range [\p first, \p last) to \p data.
+template<class T>
+inline void vtk_copy_buffer_to_data_array(buffer_iterator<T> first,
+ buffer_iterator<T> last,
+ vtkDataArray *data,
+ command_queue &queue = system::default_queue());
+
+/// \internal_
+template<class T>
+inline void vtk_copy_buffer_to_data_array(buffer_iterator<T> first,
+ buffer_iterator<T> last,
+ vtkDataArrayTemplate<T> *data,
+ command_queue &queue = system::default_queue())
+{
+ T *data_ptr = static_cast<T *>(data->GetVoidPointer(0));
+ ::boost::compute::copy(first, last, data_ptr, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP
diff --git a/boost/compute/interop/vtk/matrix4x4.hpp b/boost/compute/interop/vtk/matrix4x4.hpp
new file mode 100644
index 0000000000..550c49f19f
--- /dev/null
+++ b/boost/compute/interop/vtk/matrix4x4.hpp
@@ -0,0 +1,46 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP
+#define BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP
+
+#include <vtkMatrix4x4.h>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Converts a \c vtkMatrix4x4 to a \c float16_.
+inline float16_ vtk_matrix4x4_to_float16(const vtkMatrix4x4 *matrix)
+{
+ float16_ result;
+
+ for(int i = 0; i < 4; i++){
+ for(int j = 0; j < 4; j++){
+ result[i*4+j] = matrix->GetElement(i, j);
+ }
+ }
+
+ return result;
+}
+
+/// Converts a \c vtkMatrix4x4 to a \c double16_;
+inline double16_ vtk_matrix4x4_to_double16(const vtkMatrix4x4 *matrix)
+{
+ double16_ result;
+ std::memcpy(&result, matrix->Element, 16 * sizeof(double));
+ return result;
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP
diff --git a/boost/compute/interop/vtk/points.hpp b/boost/compute/interop/vtk/points.hpp
new file mode 100644
index 0000000000..fefbbb9874
--- /dev/null
+++ b/boost/compute/interop/vtk/points.hpp
@@ -0,0 +1,55 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP
+#define BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP
+
+#include <vector>
+
+#include <vtkPoints.h>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Copies \p points to \p buffer.
+///
+/// For example, to copy from a \c vtkPoints object to a \c vector<float4_>:
+/// \code
+/// vtkPoints *points = ...
+/// vector<float4_> vector(points->GetNumberOfPoints(), context);
+/// vtk_copy_points_to_buffer(points, vector.begin(), queue);
+/// \endcode
+template<class PointType>
+inline void vtk_copy_points_to_buffer(const vtkPoints *points,
+ buffer_iterator<PointType> buffer,
+ command_queue &queue = system::default_queue())
+{
+ vtkPoints *points_ = const_cast<vtkPoints *>(points);
+
+ // copy points to aligned buffer
+ std::vector<PointType> tmp(points_->GetNumberOfPoints());
+ for(vtkIdType i = 0; i < points_->GetNumberOfPoints(); i++){
+ double *p = points_->GetPoint(i);
+ tmp[i] = PointType(p[0], p[1], p[2], 1);
+ }
+
+ // copy data to device
+ copy(tmp.begin(), tmp.end(), buffer, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP
diff --git a/boost/compute/iterator.hpp b/boost/compute/iterator.hpp
new file mode 100644
index 0000000000..59442b1599
--- /dev/null
+++ b/boost/compute/iterator.hpp
@@ -0,0 +1,28 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute iterator headers.
+
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/iterator/constant_iterator.hpp>
+#include <boost/compute/iterator/constant_buffer_iterator.hpp>
+#include <boost/compute/iterator/counting_iterator.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/iterator/function_input_iterator.hpp>
+#include <boost/compute/iterator/permutation_iterator.hpp>
+#include <boost/compute/iterator/transform_iterator.hpp>
+#include <boost/compute/iterator/zip_iterator.hpp>
+
+#endif // BOOST_COMPUTE_ITERATOR_HPP
diff --git a/boost/compute/iterator/buffer_iterator.hpp b/boost/compute/iterator/buffer_iterator.hpp
new file mode 100644
index 0000000000..cd68058f64
--- /dev/null
+++ b/boost/compute/iterator/buffer_iterator.hpp
@@ -0,0 +1,280 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP
+
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/type_traits.hpp>
+#include <boost/static_assert.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/detail/buffer_value.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for buffer_iterator<T>
+template<class T> class buffer_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_facade super-class
+// type for buffer_iterator<T>
+template<class T>
+class buffer_iterator_base
+{
+public:
+ typedef ::boost::iterator_facade<
+ ::boost::compute::buffer_iterator<T>,
+ T,
+ ::std::random_access_iterator_tag,
+ ::boost::compute::detail::buffer_value<T>
+ > type;
+};
+
+template<class T, class IndexExpr>
+struct buffer_iterator_index_expr
+{
+ typedef T result_type;
+
+ buffer_iterator_index_expr(const buffer &buffer,
+ size_t index,
+ const memory_object::address_space address_space,
+ const IndexExpr &expr)
+ : m_buffer(buffer),
+ m_index(index),
+ m_address_space(address_space),
+ m_expr(expr)
+ {
+ }
+
+ operator T() const
+ {
+ BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value,
+ "Index expression must be integral");
+
+ return buffer_value<T>(m_buffer, size_t(m_expr) * sizeof(T));
+ }
+
+ const buffer &m_buffer;
+ size_t m_index;
+ memory_object::address_space m_address_space;
+ IndexExpr m_expr;
+};
+
+template<class T, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const buffer_iterator_index_expr<T, IndexExpr> &expr)
+{
+ if(expr.m_index == 0){
+ return kernel <<
+ kernel.get_buffer_identifier<T>(expr.m_buffer, expr.m_address_space) <<
+ '[' << expr.m_expr << ']';
+ }
+ else {
+ return kernel <<
+ kernel.get_buffer_identifier<T>(expr.m_buffer, expr.m_address_space) <<
+ '[' << uint_(expr.m_index) << "+(" << expr.m_expr << ")]";
+ }
+}
+
+} // end detail namespace
+
+/// \class buffer_iterator
+/// \brief An iterator for values in a buffer.
+///
+/// The buffer_iterator class iterates over values in a memory buffer on a
+/// compute device. It is the most commonly used iterator in Boost.Compute
+/// and is used by the \ref vector "vector<T>" and \ref array "array<T, N>"
+/// container classes.
+///
+/// Buffer iterators store a reference to a memory buffer along with an index
+/// into that memory buffer.
+///
+/// The buffer_iterator class allows for arbitrary OpenCL memory objects
+/// (including those created outside of Boost.Compute) to be used with the
+/// Boost.Compute algorithms (such as transform() and sort()). For example,
+/// to reverse the contents of an OpenCL memory buffer containing a set of
+/// integers:
+///
+/// \snippet test/test_buffer_iterator.cpp reverse_external_buffer
+///
+/// \see buffer, make_buffer_iterator()
+template<class T>
+class buffer_iterator : public detail::buffer_iterator_base<T>::type
+{
+public:
+ typedef typename detail::buffer_iterator_base<T>::type super_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::difference_type difference_type;
+
+ buffer_iterator()
+ : m_index(0)
+ {
+ }
+
+ buffer_iterator(const buffer &buffer, size_t index)
+ : m_buffer(buffer.get(), false),
+ m_index(index)
+ {
+ }
+
+ buffer_iterator(const buffer_iterator<T> &other)
+ : m_buffer(other.m_buffer.get(), false),
+ m_index(other.m_index)
+ {
+ }
+
+ buffer_iterator<T>& operator=(const buffer_iterator<T> &other)
+ {
+ if(this != &other){
+ m_buffer.get() = other.m_buffer.get();
+ m_index = other.m_index;
+ }
+
+ return *this;
+ }
+
+ ~buffer_iterator()
+ {
+ // set buffer to null so that its reference count will
+ // not be decremented when its destructor is called
+ m_buffer.get() = 0;
+ }
+
+ const buffer& get_buffer() const
+ {
+ return m_buffer;
+ }
+
+ size_t get_index() const
+ {
+ return m_index;
+ }
+
+ T read(command_queue &queue) const
+ {
+ BOOST_ASSERT(m_buffer.get());
+ BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T));
+
+ return detail::read_single_value<T>(m_buffer, m_index, queue);
+ }
+
+ void write(const T &value, command_queue &queue)
+ {
+ BOOST_ASSERT(m_buffer.get());
+ BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T));
+
+ detail::write_single_value<T>(value, m_buffer, m_index, queue);
+ }
+
+ /// \internal_
+ template<class Expr>
+ detail::buffer_iterator_index_expr<T, Expr>
+ operator[](const Expr &expr) const
+ {
+ BOOST_ASSERT(m_buffer.get());
+
+ return detail::buffer_iterator_index_expr<T, Expr>(
+ m_buffer, m_index, memory_object::global_memory, expr
+ );
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ /// \internal_
+ reference dereference() const
+ {
+ return detail::buffer_value<T>(m_buffer, m_index * sizeof(T));
+ }
+
+ /// \internal_
+ bool equal(const buffer_iterator<T> &other) const
+ {
+ return m_buffer.get() == other.m_buffer.get() &&
+ m_index == other.m_index;
+ }
+
+ /// \internal_
+ void increment()
+ {
+ m_index++;
+ }
+
+ /// \internal_
+ void decrement()
+ {
+ m_index--;
+ }
+
+ /// \internal_
+ void advance(difference_type n)
+ {
+ m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n);
+ }
+
+ /// \internal_
+ difference_type distance_to(const buffer_iterator<T> &other) const
+ {
+ return static_cast<difference_type>(other.m_index - m_index);
+ }
+
+private:
+ const buffer m_buffer;
+ size_t m_index;
+};
+
+/// Creates a new \ref buffer_iterator for \p buffer at \p index.
+///
+/// \param buffer the \ref buffer object
+/// \param index the index in the buffer
+///
+/// \return a \c buffer_iterator for \p buffer at \p index
+template<class T>
+inline buffer_iterator<T>
+make_buffer_iterator(const buffer &buffer, size_t index = 0)
+{
+ return buffer_iterator<T>(buffer, index);
+}
+
+/// \internal_ (is_device_iterator specialization for buffer_iterator)
+template<class T>
+struct is_device_iterator<buffer_iterator<T> > : boost::true_type {};
+
+namespace detail {
+
+// is_buffer_iterator specialization for buffer_iterator
+template<class Iterator>
+struct is_buffer_iterator<
+ Iterator,
+ typename boost::enable_if<
+ boost::is_same<
+ buffer_iterator<typename Iterator::value_type>,
+ typename boost::remove_const<Iterator>::type
+ >
+ >::type
+> : public boost::true_type {};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP
diff --git a/boost/compute/iterator/constant_buffer_iterator.hpp b/boost/compute/iterator/constant_buffer_iterator.hpp
new file mode 100644
index 0000000000..ef9a2ac959
--- /dev/null
+++ b/boost/compute/iterator/constant_buffer_iterator.hpp
@@ -0,0 +1,209 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP
+
+#include <cstddef>
+#include <iterator>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for constant_buffer_iterator<T>
+template<class T> class constant_buffer_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_facade super-class
+// type for constant_buffer_iterator<T>
+template<class T>
+class constant_buffer_iterator_base
+{
+public:
+ typedef ::boost::iterator_facade<
+ ::boost::compute::constant_buffer_iterator<T>,
+ T,
+ ::std::random_access_iterator_tag,
+ ::boost::compute::detail::buffer_value<T>
+ > type;
+};
+
+} // end detail namespace
+
+/// \class constant_buffer_iterator
+/// \brief An iterator for a buffer in the \c constant memory space.
+///
+/// The constant_buffer_iterator class provides an iterator for values in a
+/// buffer in the \c constant memory space.
+///
+/// For iterating over values in the \c global memory space (the most common
+/// case), use the buffer_iterator class.
+///
+/// \see buffer_iterator
+template<class T>
+class constant_buffer_iterator :
+ public detail::constant_buffer_iterator_base<T>::type
+{
+public:
+ typedef typename detail::constant_buffer_iterator_base<T>::type super_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::difference_type difference_type;
+
+ constant_buffer_iterator()
+ : m_buffer(0),
+ m_index(0)
+ {
+ }
+
+ constant_buffer_iterator(const buffer &buffer, size_t index)
+ : m_buffer(&buffer),
+ m_index(index)
+ {
+ }
+
+ constant_buffer_iterator(const constant_buffer_iterator<T> &other)
+ : m_buffer(other.m_buffer),
+ m_index(other.m_index)
+ {
+ }
+
+ constant_buffer_iterator<T>& operator=(const constant_buffer_iterator<T> &other)
+ {
+ if(this != &other){
+ m_buffer = other.m_buffer;
+ m_index = other.m_index;
+ }
+
+ return *this;
+ }
+
+ ~constant_buffer_iterator()
+ {
+ }
+
+ const buffer& get_buffer() const
+ {
+ return *m_buffer;
+ }
+
+ size_t get_index() const
+ {
+ return m_index;
+ }
+
+ T read(command_queue &queue) const
+ {
+ BOOST_ASSERT(m_buffer && m_buffer->get());
+ BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T));
+
+ return detail::read_single_value<T>(m_buffer, m_index, queue);
+ }
+
+ void write(const T &value, command_queue &queue)
+ {
+ BOOST_ASSERT(m_buffer && m_buffer->get());
+ BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T));
+
+ detail::write_single_value<T>(m_buffer, m_index, queue);
+ }
+
+ template<class Expr>
+ detail::buffer_iterator_index_expr<T, Expr>
+ operator[](const Expr &expr) const
+ {
+ BOOST_ASSERT(m_buffer);
+ BOOST_ASSERT(m_buffer->get());
+
+ return detail::buffer_iterator_index_expr<T, Expr>(
+ *m_buffer, m_index, memory_object::constant_memory, expr
+ );
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return detail::buffer_value<T>(*m_buffer, m_index);
+ }
+
+ bool equal(const constant_buffer_iterator<T> &other) const
+ {
+ return m_buffer == other.m_buffer && m_index == other.m_index;
+ }
+
+ void increment()
+ {
+ m_index++;
+ }
+
+ void decrement()
+ {
+ m_index--;
+ }
+
+ void advance(difference_type n)
+ {
+ m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n);
+ }
+
+ difference_type distance_to(const constant_buffer_iterator<T> &other) const
+ {
+ return static_cast<difference_type>(other.m_index - m_index);
+ }
+
+private:
+ const buffer *m_buffer;
+ size_t m_index;
+};
+
+/// Creates a new constant_buffer_iterator for \p buffer at \p index.
+///
+/// \param buffer the \ref buffer object
+/// \param index the index in the buffer
+///
+/// \return a \c constant_buffer_iterator for \p buffer at \p index
+template<class T>
+inline constant_buffer_iterator<T>
+make_constant_buffer_iterator(const buffer &buffer, size_t index = 0)
+{
+ return constant_buffer_iterator<T>(buffer, index);
+}
+
+/// \internal_ (is_device_iterator specialization for constant_buffer_iterator)
+template<class T>
+struct is_device_iterator<constant_buffer_iterator<T> > : boost::true_type {};
+
+namespace detail {
+
+// is_buffer_iterator specialization for constant_buffer_iterator
+template<class Iterator>
+struct is_buffer_iterator<
+ Iterator,
+ typename boost::enable_if<
+ boost::is_same<
+ constant_buffer_iterator<typename Iterator::value_type>,
+ typename boost::remove_const<Iterator>::type
+ >
+ >::type
+> : public boost::true_type {};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP
diff --git a/boost/compute/iterator/constant_iterator.hpp b/boost/compute/iterator/constant_iterator.hpp
new file mode 100644
index 0000000000..f0d45c02c0
--- /dev/null
+++ b/boost/compute/iterator/constant_iterator.hpp
@@ -0,0 +1,171 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP
+
+#include <string>
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for constant_iterator<T>
+template<class T> class constant_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_facade super-class
+// type for constant_iterator<T>
+template<class T>
+class constant_iterator_base
+{
+public:
+ typedef ::boost::iterator_facade<
+ ::boost::compute::constant_iterator<T>,
+ T,
+ ::std::random_access_iterator_tag
+ > type;
+};
+
+} // end detail namespace
+
+/// \class constant_iterator
+/// \brief An iterator with a constant value.
+///
+/// The constant_iterator class provides an iterator which returns a constant
+/// value when dereferenced.
+///
+/// For example, this could be used to implement the fill() algorithm in terms
+/// of the copy() algorithm by copying from a range of constant iterators:
+///
+/// \snippet test/test_constant_iterator.cpp fill_with_copy
+///
+/// \see make_constant_iterator()
+template<class T>
+class constant_iterator : public detail::constant_iterator_base<T>::type
+{
+public:
+ typedef typename detail::constant_iterator_base<T>::type super_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::difference_type difference_type;
+
+ constant_iterator(const T &value, size_t index = 0)
+ : m_value(value),
+ m_index(index)
+ {
+ }
+
+ constant_iterator(const constant_iterator<T> &other)
+ : m_value(other.m_value),
+ m_index(other.m_index)
+ {
+ }
+
+ constant_iterator<T>& operator=(const constant_iterator<T> &other)
+ {
+ if(this != &other){
+ m_value = other.m_value;
+ m_index = other.m_index;
+ }
+
+ return *this;
+ }
+
+ ~constant_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return m_index;
+ }
+
+ /// \internal_
+ template<class Expr>
+ detail::meta_kernel_literal<T> operator[](const Expr &expr) const
+ {
+ (void) expr;
+
+ return detail::meta_kernel::make_lit<T>(m_value);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ /// \internal_
+ reference dereference() const
+ {
+ return m_value;
+ }
+
+ /// \internal_
+ bool equal(const constant_iterator<T> &other) const
+ {
+ return m_value == other.m_value && m_index == other.m_index;
+ }
+
+ /// \internal_
+ void increment()
+ {
+ m_index++;
+ }
+
+ /// \internal_
+ void decrement()
+ {
+ m_index--;
+ }
+
+ /// \internal_
+ void advance(difference_type n)
+ {
+ m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n);
+ }
+
+ /// \internal_
+ difference_type distance_to(const constant_iterator<T> &other) const
+ {
+ return static_cast<difference_type>(other.m_index - m_index);
+ }
+
+private:
+ T m_value;
+ size_t m_index;
+};
+
+/// Returns a new constant_iterator with \p value at \p index.
+///
+/// \param value the constant value
+/// \param index the iterators index
+///
+/// \return a \c constant_iterator with \p value
+template<class T>
+inline constant_iterator<T>
+make_constant_iterator(const T &value, size_t index = 0)
+{
+ return constant_iterator<T>(value, index);
+}
+
+/// \internal_ (is_device_iterator specialization for constant_iterator)
+template<class T>
+struct is_device_iterator<constant_iterator<T> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP
diff --git a/boost/compute/iterator/counting_iterator.hpp b/boost/compute/iterator/counting_iterator.hpp
new file mode 100644
index 0000000000..304c1e05cf
--- /dev/null
+++ b/boost/compute/iterator/counting_iterator.hpp
@@ -0,0 +1,185 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP
+
+#include <string>
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for counting_iterator<T>
+template<class T> class counting_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_facade super-class
+// type for counting_iterator<T>
+template<class T>
+class counting_iterator_base
+{
+public:
+ typedef ::boost::iterator_facade<
+ ::boost::compute::counting_iterator<T>,
+ T,
+ ::std::random_access_iterator_tag
+ > type;
+};
+
+template<class T, class IndexExpr>
+struct counting_iterator_index_expr
+{
+ typedef T result_type;
+
+ counting_iterator_index_expr(const T &init, const IndexExpr &expr)
+ : m_init(init),
+ m_expr(expr)
+ {
+ }
+
+ const T &m_init;
+ IndexExpr m_expr;
+};
+
+template<class T, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const counting_iterator_index_expr<T, IndexExpr> &expr)
+{
+ return kernel << '(' << expr.m_init << '+' << expr.m_expr << ')';
+}
+
+} // end detail namespace
+
+/// \class counting_iterator
+/// \brief The counting_iterator class implements a counting iterator.
+///
+/// A counting iterator returns an internal value (initialized with \p init)
+/// which is incremented each time the iterator is incremented.
+///
+/// For example, this could be used to implement the iota() algorithm in terms
+/// of the copy() algorithm by copying from a range of counting iterators:
+///
+/// \snippet test/test_counting_iterator.cpp iota_with_copy
+///
+/// \see make_counting_iterator()
+template<class T>
+class counting_iterator : public detail::counting_iterator_base<T>::type
+{
+public:
+ typedef typename detail::counting_iterator_base<T>::type super_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::difference_type difference_type;
+
+ counting_iterator(const T &init)
+ : m_init(init)
+ {
+ }
+
+ counting_iterator(const counting_iterator<T> &other)
+ : m_init(other.m_init)
+ {
+ }
+
+ counting_iterator<T>& operator=(const counting_iterator<T> &other)
+ {
+ if(this != &other){
+ m_init = other.m_init;
+ }
+
+ return *this;
+ }
+
+ ~counting_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return 0;
+ }
+
+ template<class Expr>
+ detail::counting_iterator_index_expr<T, Expr>
+ operator[](const Expr &expr) const
+ {
+ return detail::counting_iterator_index_expr<T, Expr>(m_init, expr);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return m_init;
+ }
+
+ bool equal(const counting_iterator<T> &other) const
+ {
+ return m_init == other.m_init;
+ }
+
+ void increment()
+ {
+ m_init++;
+ }
+
+ void decrement()
+ {
+ m_init--;
+ }
+
+ void advance(difference_type n)
+ {
+ m_init += static_cast<T>(n);
+ }
+
+ difference_type distance_to(const counting_iterator<T> &other) const
+ {
+ return difference_type(other.m_init) - difference_type(m_init);
+ }
+
+private:
+ T m_init;
+};
+
+/// Returns a new counting_iterator starting at \p init.
+///
+/// \param init the initial value
+///
+/// \return a counting_iterator with \p init.
+///
+/// For example, to create a counting iterator which returns unsigned integers
+/// and increments from one:
+/// \code
+/// auto iter = make_counting_iterator<uint_>(1);
+/// \endcode
+template<class T>
+inline counting_iterator<T> make_counting_iterator(const T &init)
+{
+ return counting_iterator<T>(init);
+}
+
+/// \internal_ (is_device_iterator specialization for counting_iterator)
+template<class T>
+struct is_device_iterator<counting_iterator<T> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP
diff --git a/boost/compute/iterator/detail/get_base_iterator_buffer.hpp b/boost/compute/iterator/detail/get_base_iterator_buffer.hpp
new file mode 100644
index 0000000000..3d14355115
--- /dev/null
+++ b/boost/compute/iterator/detail/get_base_iterator_buffer.hpp
@@ -0,0 +1,52 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP
+#define BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// returns the buffer for an iterator adaptor's base iterator if
+// it exists, otherwise returns a null buffer object.
+template<class Iterator>
+inline const buffer&
+get_base_iterator_buffer(const Iterator &iter,
+ typename boost::enable_if<
+ is_buffer_iterator<
+ typename Iterator::base_type
+ >
+ >::type* = 0)
+{
+ return iter.base().get_buffer();
+}
+
+template<class Iterator>
+inline const buffer&
+get_base_iterator_buffer(const Iterator &iter,
+ typename boost::disable_if<
+ is_buffer_iterator<
+ typename Iterator::base_type
+ >
+ >::type* = 0)
+{
+ (void) iter;
+
+ static buffer null_buffer;
+
+ return null_buffer;
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP
diff --git a/boost/compute/iterator/detail/swizzle_iterator.hpp b/boost/compute/iterator/detail/swizzle_iterator.hpp
new file mode 100644
index 0000000000..c7c3c45340
--- /dev/null
+++ b/boost/compute/iterator/detail/swizzle_iterator.hpp
@@ -0,0 +1,188 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP
+
+#include <string>
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_adaptor.hpp>
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/type_traits/make_vector_type.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// forward declaration for swizzle_iterator
+template<class InputIterator, size_t Size>
+class swizzle_iterator;
+
+// meta-function returing the value_type for a swizzle_iterator
+template<class InputIterator, size_t Size>
+struct make_swizzle_iterator_value_type
+{
+ typedef
+ typename make_vector_type<
+ typename scalar_type<
+ typename std::iterator_traits<InputIterator>::value_type
+ >::type,
+ Size
+ >::type type;
+};
+
+// helper class which defines the iterator_adaptor super-class
+// type for swizzle_iterator
+template<class InputIterator, size_t Size>
+class swizzle_iterator_base
+{
+public:
+ typedef ::boost::iterator_adaptor<
+ swizzle_iterator<InputIterator, Size>,
+ InputIterator,
+ typename make_swizzle_iterator_value_type<InputIterator, Size>::type,
+ typename std::iterator_traits<InputIterator>::iterator_category,
+ typename make_swizzle_iterator_value_type<InputIterator, Size>::type
+ > type;
+};
+
+template<class InputIterator, size_t Size, class IndexExpr>
+struct swizzle_iterator_index_expr
+{
+ typedef typename make_swizzle_iterator_value_type<InputIterator, Size>::type result_type;
+
+ swizzle_iterator_index_expr(const InputIterator &input_iter,
+ const IndexExpr &index_expr,
+ const std::string &components)
+ : m_input_iter(input_iter),
+ m_index_expr(index_expr),
+ m_components(components)
+ {
+ }
+
+ InputIterator m_input_iter;
+ IndexExpr m_index_expr;
+ std::string m_components;
+};
+
+template<class InputIterator, size_t Size, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const swizzle_iterator_index_expr<InputIterator,
+ Size,
+ IndexExpr> &expr)
+{
+ return kernel << expr.m_input_iter[expr.m_index_expr]
+ << "." << expr.m_components;
+}
+
+template<class InputIterator, size_t Size>
+class swizzle_iterator :
+ public swizzle_iterator_base<InputIterator, Size>::type
+{
+public:
+ typedef typename
+ swizzle_iterator_base<InputIterator, Size>::type
+ super_type;
+ typedef typename super_type::value_type value_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::base_type base_type;
+ typedef typename super_type::difference_type difference_type;
+
+ BOOST_STATIC_CONSTANT(size_t, vector_size = Size);
+
+ swizzle_iterator(InputIterator iterator, const std::string &components)
+ : super_type(iterator),
+ m_components(components)
+ {
+ BOOST_ASSERT(components.size() == Size);
+ }
+
+ swizzle_iterator(const swizzle_iterator<InputIterator, Size> &other)
+ : super_type(other.base()),
+ m_components(other.m_components)
+ {
+ BOOST_ASSERT(m_components.size() == Size);
+ }
+
+ swizzle_iterator<InputIterator, Size>&
+ operator=(const swizzle_iterator<InputIterator, Size> &other)
+ {
+ if(this != &other){
+ super_type::operator=(other);
+
+ m_components = other.m_components;
+ }
+
+ return *this;
+ }
+
+ ~swizzle_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return super_type::base().get_index();
+ }
+
+ const buffer& get_buffer() const
+ {
+ return get_base_iterator_buffer(*this);
+ }
+
+ template<class IndexExpression>
+ swizzle_iterator_index_expr<InputIterator, Size, IndexExpression>
+ operator[](const IndexExpression &expr) const
+ {
+ return swizzle_iterator_index_expr<InputIterator,
+ Size,
+ IndexExpression>(super_type::base(),
+ expr,
+ m_components);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return reference();
+ }
+
+private:
+ std::string m_components;
+};
+
+template<size_t Size, class InputIterator>
+inline swizzle_iterator<InputIterator, Size>
+make_swizzle_iterator(InputIterator iterator, const std::string &components)
+{
+ return swizzle_iterator<InputIterator, Size>(iterator, components);
+}
+
+} // end detail namespace
+
+// is_device_iterator specialization for swizzle_iterator
+template<size_t Size, class InputIterator>
+struct is_device_iterator<detail::swizzle_iterator<InputIterator, Size> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_SWIZZLE_ITERATOR_HPP
diff --git a/boost/compute/iterator/discard_iterator.hpp b/boost/compute/iterator/discard_iterator.hpp
new file mode 100644
index 0000000000..e002cf2ac2
--- /dev/null
+++ b/boost/compute/iterator/discard_iterator.hpp
@@ -0,0 +1,170 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP
+
+#include <string>
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for discard_iterator
+class discard_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_facade super-class
+// type for discard_iterator
+struct discard_iterator_base
+{
+ typedef ::boost::iterator_facade<
+ ::boost::compute::discard_iterator,
+ void,
+ ::std::random_access_iterator_tag,
+ void *
+ > type;
+};
+
+template<class IndexExpr>
+struct discard_iterator_index_expr
+{
+ typedef void result_type;
+
+ discard_iterator_index_expr(const IndexExpr &expr)
+ : m_expr(expr)
+ {
+ }
+
+ IndexExpr m_expr;
+};
+
+template<class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const discard_iterator_index_expr<IndexExpr> &expr)
+{
+ (void) expr;
+
+ return kernel;
+}
+
+} // end detail namespace
+
+/// \class discard_iterator
+/// \brief An iterator which discards all values written to it.
+///
+/// \see make_discard_iterator(), constant_iterator
+class discard_iterator : public detail::discard_iterator_base::type
+{
+public:
+ typedef detail::discard_iterator_base::type super_type;
+ typedef super_type::reference reference;
+ typedef super_type::difference_type difference_type;
+
+ discard_iterator(size_t index = 0)
+ : m_index(index)
+ {
+ }
+
+ discard_iterator(const discard_iterator &other)
+ : m_index(other.m_index)
+ {
+ }
+
+ discard_iterator& operator=(const discard_iterator &other)
+ {
+ if(this != &other){
+ m_index = other.m_index;
+ }
+
+ return *this;
+ }
+
+ ~discard_iterator()
+ {
+ }
+
+ /// \internal_
+ template<class Expr>
+ detail::discard_iterator_index_expr<Expr>
+ operator[](const Expr &expr) const
+ {
+ return detail::discard_iterator_index_expr<Expr>(expr);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ /// \internal_
+ reference dereference() const
+ {
+ return 0;
+ }
+
+ /// \internal_
+ bool equal(const discard_iterator &other) const
+ {
+ return m_index == other.m_index;
+ }
+
+ /// \internal_
+ void increment()
+ {
+ m_index++;
+ }
+
+ /// \internal_
+ void decrement()
+ {
+ m_index--;
+ }
+
+ /// \internal_
+ void advance(difference_type n)
+ {
+ m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n);
+ }
+
+ /// \internal_
+ difference_type distance_to(const discard_iterator &other) const
+ {
+ return static_cast<difference_type>(other.m_index - m_index);
+ }
+
+private:
+ size_t m_index;
+};
+
+/// Returns a new discard_iterator with \p index.
+///
+/// \param index the index of the iterator
+///
+/// \return a \c discard_iterator at \p index
+inline discard_iterator make_discard_iterator(size_t index = 0)
+{
+ return discard_iterator(index);
+}
+
+/// internal_ (is_device_iterator specialization for discard_iterator)
+template<>
+struct is_device_iterator<discard_iterator> : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP
diff --git a/boost/compute/iterator/function_input_iterator.hpp b/boost/compute/iterator/function_input_iterator.hpp
new file mode 100644
index 0000000000..bd89b6c0fc
--- /dev/null
+++ b/boost/compute/iterator/function_input_iterator.hpp
@@ -0,0 +1,186 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP
+
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for function_input_iterator<Function>
+template<class Function> class function_input_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_facade super-class
+// type for function_input_iterator<Function>
+template<class Function>
+class function_input_iterator_base
+{
+public:
+ typedef ::boost::iterator_facade<
+ ::boost::compute::function_input_iterator<Function>,
+ typename ::boost::compute::result_of<Function()>::type,
+ ::std::random_access_iterator_tag,
+ typename ::boost::compute::result_of<Function()>::type
+ > type;
+};
+
+template<class Function>
+struct function_input_iterator_expr
+{
+ typedef typename ::boost::compute::result_of<Function()>::type result_type;
+
+ function_input_iterator_expr(const Function &function)
+ : m_function(function)
+ {
+ }
+
+ Function m_function;
+};
+
+template<class Function>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const function_input_iterator_expr<Function> &expr)
+{
+ return kernel << expr.m_function();
+}
+
+} // end detail namespace
+
+/// \class function_input_iterator
+/// \brief Iterator which returns the result of a function when dereferenced
+///
+/// For example:
+///
+/// \snippet test/test_function_input_iterator.cpp generate_42
+///
+/// \see make_function_input_iterator()
+template<class Function>
+class function_input_iterator :
+ public detail::function_input_iterator_base<Function>::type
+{
+public:
+ typedef typename detail::function_input_iterator_base<Function>::type super_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::difference_type difference_type;
+ typedef Function function;
+
+ function_input_iterator(const Function &function, size_t index = 0)
+ : m_function(function),
+ m_index(index)
+ {
+ }
+
+ function_input_iterator(const function_input_iterator<Function> &other)
+ : m_function(other.m_function),
+ m_index(other.m_index)
+ {
+ }
+
+ function_input_iterator<Function>&
+ operator=(const function_input_iterator<Function> &other)
+ {
+ if(this != &other){
+ m_function = other.m_function;
+ m_index = other.m_index;
+ }
+
+ return *this;
+ }
+
+ ~function_input_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return m_index;
+ }
+
+ template<class Expr>
+ detail::function_input_iterator_expr<Function>
+ operator[](const Expr &expr) const
+ {
+ (void) expr;
+
+ return detail::function_input_iterator_expr<Function>(m_function);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return reference();
+ }
+
+ bool equal(const function_input_iterator<Function> &other) const
+ {
+ return m_function == other.m_function && m_index == other.m_index;
+ }
+
+ void increment()
+ {
+ m_index++;
+ }
+
+ void decrement()
+ {
+ m_index--;
+ }
+
+ void advance(difference_type n)
+ {
+ m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n);
+ }
+
+ difference_type
+ distance_to(const function_input_iterator<Function> &other) const
+ {
+ return static_cast<difference_type>(other.m_index - m_index);
+ }
+
+private:
+ Function m_function;
+ size_t m_index;
+};
+
+/// Returns a function_input_iterator with \p function.
+///
+/// \param function function to execute when dereferenced
+/// \param index index of the iterator
+///
+/// \return a \c function_input_iterator with \p function
+template<class Function>
+inline function_input_iterator<Function>
+make_function_input_iterator(const Function &function, size_t index = 0)
+{
+ return function_input_iterator<Function>(function, index);
+}
+
+/// \internal_ (is_device_iterator specialization for function_input_iterator)
+template<class Function>
+struct is_device_iterator<function_input_iterator<Function> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP
diff --git a/boost/compute/iterator/permutation_iterator.hpp b/boost/compute/iterator/permutation_iterator.hpp
new file mode 100644
index 0000000000..8a7f97a402
--- /dev/null
+++ b/boost/compute/iterator/permutation_iterator.hpp
@@ -0,0 +1,192 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP
+
+#include <string>
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_adaptor.hpp>
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for transform_iterator
+template<class ElementIterator, class IndexIterator>
+class permutation_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_adaptor super-class
+// type for permutation_iterator
+template<class ElementIterator, class IndexIterator>
+class permutation_iterator_base
+{
+public:
+ typedef ::boost::iterator_adaptor<
+ ::boost::compute::permutation_iterator<ElementIterator, IndexIterator>,
+ ElementIterator
+ > type;
+};
+
+template<class ElementIterator, class IndexIterator, class IndexExpr>
+struct permutation_iterator_access_expr
+{
+ typedef typename std::iterator_traits<ElementIterator>::value_type result_type;
+
+ permutation_iterator_access_expr(const ElementIterator &e,
+ const IndexIterator &i,
+ const IndexExpr &expr)
+ : m_element_iter(e),
+ m_index_iter(i),
+ m_expr(expr)
+ {
+ }
+
+ ElementIterator m_element_iter;
+ IndexIterator m_index_iter;
+ IndexExpr m_expr;
+};
+
+template<class ElementIterator, class IndexIterator, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const permutation_iterator_access_expr<ElementIterator,
+ IndexIterator,
+ IndexExpr> &expr)
+{
+ return kernel << expr.m_element_iter[expr.m_index_iter[expr.m_expr]];
+}
+
+} // end detail namespace
+
+/// \class permutation_iterator
+/// \brief The permutation_iterator class provides a permuation iterator
+///
+/// A permutation iterator iterates over a value range and an index range. When
+/// dereferenced, it returns the value from the value range using the current
+/// index from the index range.
+///
+/// For example, to reverse a range using the copy() algorithm and a permutation
+/// sequence:
+///
+/// \snippet test/test_permutation_iterator.cpp reverse_range
+///
+/// \see make_permutation_iterator()
+template<class ElementIterator, class IndexIterator>
+class permutation_iterator
+ : public detail::permutation_iterator_base<ElementIterator,
+ IndexIterator>::type
+{
+public:
+ typedef typename
+ detail::permutation_iterator_base<ElementIterator,
+ IndexIterator>::type super_type;
+ typedef typename super_type::value_type value_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::base_type base_type;
+ typedef typename super_type::difference_type difference_type;
+ typedef IndexIterator index_iterator;
+
+ permutation_iterator(ElementIterator e, IndexIterator i)
+ : super_type(e),
+ m_map(i)
+ {
+ }
+
+ permutation_iterator(const permutation_iterator<ElementIterator,
+ IndexIterator> &other)
+ : super_type(other),
+ m_map(other.m_map)
+ {
+ }
+
+ permutation_iterator<ElementIterator, IndexIterator>&
+ operator=(const permutation_iterator<ElementIterator,
+ IndexIterator> &other)
+ {
+ if(this != &other){
+ super_type::operator=(other);
+ m_map = other.m_map;
+ }
+
+ return *this;
+ }
+
+ ~permutation_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return super_type::base().get_index();
+ }
+
+ const buffer& get_buffer() const
+ {
+ return detail::get_base_iterator_buffer(*this);
+ }
+
+ template<class IndexExpr>
+ detail::permutation_iterator_access_expr<ElementIterator,
+ IndexIterator,
+ IndexExpr>
+ operator[](const IndexExpr &expr) const
+ {
+ return detail::permutation_iterator_access_expr<ElementIterator,
+ IndexIterator,
+ IndexExpr>(super_type::base(),
+ m_map,
+ expr);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return reference();
+ }
+
+private:
+ IndexIterator m_map;
+};
+
+/// Returns a permutation_iterator for \p e using indices from \p i.
+///
+/// \param e the element range iterator
+/// \param i the index range iterator
+///
+/// \return a \c permutation_iterator for \p e using \p i
+template<class ElementIterator, class IndexIterator>
+inline permutation_iterator<ElementIterator, IndexIterator>
+make_permutation_iterator(ElementIterator e, IndexIterator i)
+{
+ return permutation_iterator<ElementIterator, IndexIterator>(e, i);
+}
+
+/// \internal_ (is_device_iterator specialization for permutation_iterator)
+template<class ElementIterator, class IndexIterator>
+struct is_device_iterator<
+ permutation_iterator<ElementIterator, IndexIterator> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP
diff --git a/boost/compute/iterator/strided_iterator.hpp b/boost/compute/iterator/strided_iterator.hpp
new file mode 100644
index 0000000000..52e7f07bd8
--- /dev/null
+++ b/boost/compute/iterator/strided_iterator.hpp
@@ -0,0 +1,296 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP
+
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_adaptor.hpp>
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for strided_iterator
+template<class Iterator>
+class strided_iterator;
+
+namespace detail {
+
+// helper class which defines the iterator_adaptor super-class
+// type for strided_iterator
+template<class Iterator>
+class strided_iterator_base
+{
+public:
+ typedef ::boost::iterator_adaptor<
+ ::boost::compute::strided_iterator<Iterator>,
+ Iterator
+ > type;
+};
+
+// helper class for including stride value in index expression
+template<class IndexExpr, class Stride>
+struct stride_expr
+{
+ stride_expr(const IndexExpr &expr, const Stride &stride)
+ : m_index_expr(expr),
+ m_stride(stride)
+ {
+ }
+
+ IndexExpr m_index_expr;
+ Stride m_stride;
+};
+
+template<class IndexExpr, class Stride>
+inline stride_expr<IndexExpr, Stride> make_stride_expr(const IndexExpr &expr,
+ const Stride &stride)
+{
+ return stride_expr<IndexExpr, Stride>(expr, stride);
+}
+
+template<class IndexExpr, class Stride>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const stride_expr<IndexExpr, Stride> &expr)
+{
+ // (expr.m_stride * (expr.m_index_expr))
+ return kernel << "(" << static_cast<ulong_>(expr.m_stride)
+ << " * (" << expr.m_index_expr << "))";
+}
+
+template<class Iterator, class Stride, class IndexExpr>
+struct strided_iterator_index_expr
+{
+ typedef typename std::iterator_traits<Iterator>::value_type result_type;
+
+ strided_iterator_index_expr(const Iterator &input_iter,
+ const Stride &stride,
+ const IndexExpr &index_expr)
+ : m_input_iter(input_iter),
+ m_stride(stride),
+ m_index_expr(index_expr)
+ {
+ }
+
+ Iterator m_input_iter;
+ const Stride& m_stride;
+ IndexExpr m_index_expr;
+};
+
+template<class Iterator, class Stride, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const strided_iterator_index_expr<Iterator,
+ Stride,
+ IndexExpr> &expr)
+{
+ return kernel << expr.m_input_iter[make_stride_expr(expr.m_index_expr, expr.m_stride)];
+}
+
+} // end detail namespace
+
+/// \class strided_iterator
+/// \brief An iterator adaptor with adjustable iteration step.
+///
+/// The strided iterator adaptor skips over multiple elements each time
+/// it is incremented or decremented.
+///
+/// \see buffer_iterator, make_strided_iterator(), make_strided_iterator_end()
+template<class Iterator>
+class strided_iterator :
+ public detail::strided_iterator_base<Iterator>::type
+{
+public:
+ typedef typename
+ detail::strided_iterator_base<Iterator>::type super_type;
+ typedef typename super_type::value_type value_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::base_type base_type;
+ typedef typename super_type::difference_type difference_type;
+
+ strided_iterator(Iterator iterator, difference_type stride)
+ : super_type(iterator),
+ m_stride(static_cast<difference_type>(stride))
+ {
+ // stride must be greater than zero
+ BOOST_ASSERT_MSG(stride > 0, "Stride must be greater than zero");
+ }
+
+ strided_iterator(const strided_iterator<Iterator> &other)
+ : super_type(other.base()),
+ m_stride(other.m_stride)
+ {
+ }
+
+ strided_iterator<Iterator>&
+ operator=(const strided_iterator<Iterator> &other)
+ {
+ if(this != &other){
+ super_type::operator=(other);
+
+ m_stride = other.m_stride;
+ }
+
+ return *this;
+ }
+
+ ~strided_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return super_type::base().get_index();
+ }
+
+ const buffer& get_buffer() const
+ {
+ return detail::get_base_iterator_buffer(*this);
+ }
+
+ template<class IndexExpression>
+ detail::strided_iterator_index_expr<Iterator, difference_type, IndexExpression>
+ operator[](const IndexExpression &expr) const
+ {
+ typedef
+ typename detail::strided_iterator_index_expr<Iterator,
+ difference_type,
+ IndexExpression>
+ StridedIndexExprType;
+ return StridedIndexExprType(super_type::base(),m_stride, expr);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return reference();
+ }
+
+ bool equal(const strided_iterator<Iterator> &other) const
+ {
+ return (other.m_stride == m_stride)
+ && (other.base_reference() == this->base_reference());
+ }
+
+ void increment()
+ {
+ std::advance(super_type::base_reference(), m_stride);
+ }
+
+ void decrement()
+ {
+ std::advance(super_type::base_reference(),-m_stride);
+ }
+
+ void advance(typename super_type::difference_type n)
+ {
+ std::advance(super_type::base_reference(), n * m_stride);
+ }
+
+ difference_type distance_to(const strided_iterator<Iterator> &other) const
+ {
+ return std::distance(this->base_reference(), other.base_reference()) / m_stride;
+ }
+
+private:
+ difference_type m_stride;
+};
+
+/// Returns a strided_iterator for \p iterator with \p stride.
+///
+/// \param iterator the underlying iterator
+/// \param stride the iteration step for strided_iterator
+///
+/// \return a \c strided_iterator for \p iterator with \p stride.
+///
+/// For example, to create an iterator which iterates over every other
+/// element in a \c vector<int>:
+/// \code
+/// auto strided_iterator = make_strided_iterator(vec.begin(), 2);
+/// \endcode
+template<class Iterator>
+inline strided_iterator<Iterator>
+make_strided_iterator(Iterator iterator,
+ typename std::iterator_traits<Iterator>::difference_type stride)
+{
+ return strided_iterator<Iterator>(iterator, stride);
+}
+
+/// Returns a strided_iterator which refers to element that would follow
+/// the last element accessible through strided_iterator for \p first iterator
+/// with \p stride.
+///
+/// Parameter \p stride must be greater than zero.
+///
+/// \param first the iterator referring to the first element accessible
+/// through strided_iterator for \p first with \p stride
+/// \param last the iterator referring to the last element that may be
+//// accessible through strided_iterator for \p first with \p stride
+/// \param stride the iteration step
+///
+/// \return a \c strided_iterator referring to element that would follow
+/// the last element accessible through strided_iterator for \p first
+/// iterator with \p stride.
+///
+/// It can be helpful when iterating over strided_iterator:
+/// \code
+/// // vec.size() may not be divisible by 3
+/// auto strided_iterator_begin = make_strided_iterator(vec.begin(), 3);
+/// auto strided_iterator_end = make_strided_iterator_end(vec.begin(), vec.end(), 3);
+///
+/// // copy every 3rd element to result
+/// boost::compute::copy(
+/// strided_iterator_begin,
+/// strided_iterator_end,
+/// result.begin(),
+/// queue
+/// );
+/// \endcode
+template<class Iterator>
+strided_iterator<Iterator>
+make_strided_iterator_end(Iterator first,
+ Iterator last,
+ typename std::iterator_traits<Iterator>::difference_type stride)
+{
+ typedef typename std::iterator_traits<Iterator>::difference_type difference_type;
+
+ // calculate distance from end to the last element that would be
+ // accessible through strided_iterator.
+ difference_type range = std::distance(first, last);
+ difference_type d = (range - 1) / stride;
+ d *= stride;
+ d -= range;
+ // advance from end to the element that would follow the last
+ // accessible element
+ Iterator end_for_strided_iterator = last;
+ std::advance(end_for_strided_iterator, d + stride);
+ return strided_iterator<Iterator>(end_for_strided_iterator, stride);
+}
+
+/// \internal_ (is_device_iterator specialization for strided_iterator)
+template<class Iterator>
+struct is_device_iterator<strided_iterator<Iterator> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP
diff --git a/boost/compute/iterator/transform_iterator.hpp b/boost/compute/iterator/transform_iterator.hpp
new file mode 100644
index 0000000000..c040922f9d
--- /dev/null
+++ b/boost/compute/iterator/transform_iterator.hpp
@@ -0,0 +1,227 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP
+
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/iterator/iterator_adaptor.hpp>
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/read_write_single_value.hpp>
+#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for transform_iterator
+template<class InputIterator, class UnaryFunction>
+class transform_iterator;
+
+namespace detail {
+
+// meta-function returning the value_type for a transform_iterator
+template<class InputIterator, class UnaryFunction>
+struct make_transform_iterator_value_type
+{
+ typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+ typedef typename boost::compute::result_of<UnaryFunction(value_type)>::type type;
+};
+
+// helper class which defines the iterator_adaptor super-class
+// type for transform_iterator
+template<class InputIterator, class UnaryFunction>
+class transform_iterator_base
+{
+public:
+ typedef ::boost::iterator_adaptor<
+ ::boost::compute::transform_iterator<InputIterator, UnaryFunction>,
+ InputIterator,
+ typename make_transform_iterator_value_type<InputIterator, UnaryFunction>::type,
+ typename std::iterator_traits<InputIterator>::iterator_category,
+ typename make_transform_iterator_value_type<InputIterator, UnaryFunction>::type
+ > type;
+};
+
+template<class InputIterator, class UnaryFunction, class IndexExpr>
+struct transform_iterator_index_expr
+{
+ typedef typename
+ make_transform_iterator_value_type<
+ InputIterator,
+ UnaryFunction
+ >::type result_type;
+
+ transform_iterator_index_expr(const InputIterator &input_iter,
+ const UnaryFunction &transform_expr,
+ const IndexExpr &index_expr)
+ : m_input_iter(input_iter),
+ m_transform_expr(transform_expr),
+ m_index_expr(index_expr)
+ {
+ }
+
+ InputIterator m_input_iter;
+ UnaryFunction m_transform_expr;
+ IndexExpr m_index_expr;
+};
+
+template<class InputIterator, class UnaryFunction, class IndexExpr>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const transform_iterator_index_expr<InputIterator,
+ UnaryFunction,
+ IndexExpr> &expr)
+{
+ return kernel << expr.m_transform_expr(expr.m_input_iter[expr.m_index_expr]);
+}
+
+} // end detail namespace
+
+/// \class transform_iterator
+/// \brief A transform iterator adaptor.
+///
+/// The transform_iterator adaptor applies a unary function to each element
+/// produced from the underlying iterator when dereferenced.
+///
+/// For example, to copy from an input range to an output range while taking
+/// the absolute value of each element:
+///
+/// \snippet test/test_transform_iterator.cpp copy_abs
+///
+/// \see buffer_iterator, make_transform_iterator()
+template<class InputIterator, class UnaryFunction>
+class transform_iterator :
+ public detail::transform_iterator_base<InputIterator, UnaryFunction>::type
+{
+public:
+ typedef typename
+ detail::transform_iterator_base<InputIterator,
+ UnaryFunction>::type super_type;
+ typedef typename super_type::value_type value_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::base_type base_type;
+ typedef typename super_type::difference_type difference_type;
+ typedef UnaryFunction unary_function;
+
+ transform_iterator(InputIterator iterator, UnaryFunction transform)
+ : super_type(iterator),
+ m_transform(transform)
+ {
+ }
+
+ transform_iterator(const transform_iterator<InputIterator,
+ UnaryFunction> &other)
+ : super_type(other.base()),
+ m_transform(other.m_transform)
+ {
+ }
+
+ transform_iterator<InputIterator, UnaryFunction>&
+ operator=(const transform_iterator<InputIterator,
+ UnaryFunction> &other)
+ {
+ if(this != &other){
+ super_type::operator=(other);
+
+ m_transform = other.m_transform;
+ }
+
+ return *this;
+ }
+
+ ~transform_iterator()
+ {
+ }
+
+ size_t get_index() const
+ {
+ return super_type::base().get_index();
+ }
+
+ const buffer& get_buffer() const
+ {
+ return detail::get_base_iterator_buffer(*this);
+ }
+
+ template<class IndexExpression>
+ detail::transform_iterator_index_expr<InputIterator, UnaryFunction, IndexExpression>
+ operator[](const IndexExpression &expr) const
+ {
+ return detail::transform_iterator_index_expr<InputIterator,
+ UnaryFunction,
+ IndexExpression>(super_type::base(),
+ m_transform,
+ expr);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ const context &context = super_type::base().get_buffer().get_context();
+ command_queue queue(context, context.get_device());
+
+ detail::meta_kernel k("read");
+ size_t output_arg = k.add_arg<value_type *>(memory_object::global_memory, "output");
+ k << "*output = " << m_transform(super_type::base()[k.lit(0)]) << ";";
+
+ kernel kernel = k.compile(context);
+
+ buffer output_buffer(context, sizeof(value_type));
+
+ kernel.set_arg(output_arg, output_buffer);
+
+ queue.enqueue_task(kernel);
+
+ return detail::read_single_value<value_type>(output_buffer, queue);
+ }
+
+private:
+ UnaryFunction m_transform;
+};
+
+/// Returns a transform_iterator for \p iterator with \p transform.
+///
+/// \param iterator the underlying iterator
+/// \param transform the unary transform function
+///
+/// \return a \c transform_iterator for \p iterator with \p transform
+///
+/// For example, to create an iterator which returns the square-root of each
+/// value in a \c vector<int>:
+/// \code
+/// auto sqrt_iterator = make_transform_iterator(vec.begin(), sqrt<int>());
+/// \endcode
+template<class InputIterator, class UnaryFunction>
+inline transform_iterator<InputIterator, UnaryFunction>
+make_transform_iterator(InputIterator iterator, UnaryFunction transform)
+{
+ return transform_iterator<InputIterator,
+ UnaryFunction>(iterator, transform);
+}
+
+/// \internal_ (is_device_iterator specialization for transform_iterator)
+template<class InputIterator, class UnaryFunction>
+struct is_device_iterator<
+ transform_iterator<InputIterator, UnaryFunction> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP
diff --git a/boost/compute/iterator/zip_iterator.hpp b/boost/compute/iterator/zip_iterator.hpp
new file mode 100644
index 0000000000..2860d73a93
--- /dev/null
+++ b/boost/compute/iterator/zip_iterator.hpp
@@ -0,0 +1,316 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP
+#define BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP
+
+#include <cstddef>
+#include <iterator>
+
+#include <boost/config.hpp>
+#include <boost/fusion/algorithm/iteration/for_each.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+#include <boost/mpl/back_inserter.hpp>
+#include <boost/mpl/transform.hpp>
+#include <boost/mpl/vector.hpp>
+#include <boost/preprocessor/repetition.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/tuple/tuple_comparison.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/functional.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/mpl_vector_to_tuple.hpp>
+#include <boost/compute/types/tuple.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+
+// forward declaration for zip_iterator
+template<class IteratorTuple>
+class zip_iterator;
+
+namespace detail {
+
+namespace mpl = boost::mpl;
+
+// meta-function returning the value_type for an iterator
+template<class Iterator>
+struct make_iterator_value_type
+{
+ typedef typename std::iterator_traits<Iterator>::value_type type;
+};
+
+// meta-function returning the value_type for a zip_iterator
+template<class IteratorTuple>
+struct make_zip_iterator_value_type
+{
+ typedef typename
+ detail::mpl_vector_to_tuple<
+ typename mpl::transform<
+ IteratorTuple,
+ make_iterator_value_type<mpl::_1>,
+ mpl::back_inserter<mpl::vector<> >
+ >::type
+ >::type type;
+};
+
+// helper class which defines the iterator_facade super-class
+// type for zip_iterator
+template<class IteratorTuple>
+class zip_iterator_base
+{
+public:
+ typedef ::boost::iterator_facade<
+ ::boost::compute::zip_iterator<IteratorTuple>,
+ typename make_zip_iterator_value_type<IteratorTuple>::type,
+ ::std::random_access_iterator_tag,
+ typename make_zip_iterator_value_type<IteratorTuple>::type
+ > type;
+};
+
+template<class IteratorTuple, class IndexExpr>
+struct zip_iterator_index_expr
+{
+ typedef typename
+ make_zip_iterator_value_type<IteratorTuple>::type
+ result_type;
+
+ zip_iterator_index_expr(const IteratorTuple &iterators,
+ const IndexExpr &index_expr)
+ : m_iterators(iterators),
+ m_index_expr(index_expr)
+ {
+ }
+
+ IteratorTuple m_iterators;
+ IndexExpr m_index_expr;
+};
+
+/// \internal_
+#define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \
+ BOOST_PP_EXPR_IF(n, << ", ") \
+ << boost::get<n>(expr.m_iterators)[expr.m_index_expr]
+
+/// \internal_
+#define BOOST_COMPUTE_PRINT_ZIP_IDX(z, n, unused) \
+template<BOOST_PP_ENUM_PARAMS(n, class Iterator), class IndexExpr> \
+inline meta_kernel& operator<<( \
+ meta_kernel &kernel, \
+ const zip_iterator_index_expr< \
+ boost::tuple<BOOST_PP_ENUM_PARAMS(n, Iterator)>, \
+ IndexExpr \
+ > &expr) \
+{ \
+ typedef typename \
+ boost::tuple<BOOST_PP_ENUM_PARAMS(n, Iterator)> \
+ tuple_type; \
+ typedef typename \
+ make_zip_iterator_value_type<tuple_type>::type \
+ value_type; \
+ kernel.inject_type<value_type>(); \
+ return kernel \
+ << "(" << type_name<value_type>() << ")" \
+ << "{ " \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \
+ << "}"; \
+}
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_ZIP_IDX, ~)
+
+#undef BOOST_COMPUTE_PRINT_ZIP_IDX
+#undef BOOST_COMPUTE_PRINT_ELEM
+
+struct iterator_advancer
+{
+ iterator_advancer(size_t n)
+ : m_distance(n)
+ {
+ }
+
+ template<class Iterator>
+ void operator()(Iterator &i) const
+ {
+ std::advance(i, m_distance);
+ }
+
+ size_t m_distance;
+};
+
+template<class Iterator>
+void increment_iterator(Iterator &i)
+{
+ i++;
+}
+
+template<class Iterator>
+void decrement_iterator(Iterator &i)
+{
+ i--;
+}
+
+} // end detail namespace
+
+/// \class zip_iterator
+/// \brief A zip iterator adaptor.
+///
+/// The zip_iterator class combines values from multiple input iterators. When
+/// dereferenced it returns a tuple containing each value at the current
+/// position in each input range.
+///
+/// \see make_zip_iterator()
+template<class IteratorTuple>
+class zip_iterator : public detail::zip_iterator_base<IteratorTuple>::type
+{
+public:
+ typedef typename
+ detail::zip_iterator_base<IteratorTuple>::type
+ super_type;
+ typedef typename super_type::value_type value_type;
+ typedef typename super_type::reference reference;
+ typedef typename super_type::difference_type difference_type;
+ typedef IteratorTuple iterator_tuple;
+
+ zip_iterator(IteratorTuple iterators)
+ : m_iterators(iterators)
+ {
+ }
+
+ zip_iterator(const zip_iterator<IteratorTuple> &other)
+ : m_iterators(other.m_iterators)
+ {
+ }
+
+ zip_iterator<IteratorTuple>&
+ operator=(const zip_iterator<IteratorTuple> &other)
+ {
+ if(this != &other){
+ super_type::operator=(other);
+
+ m_iterators = other.m_iterators;
+ }
+
+ return *this;
+ }
+
+ ~zip_iterator()
+ {
+ }
+
+ const IteratorTuple& get_iterator_tuple() const
+ {
+ return m_iterators;
+ }
+
+ template<class IndexExpression>
+ detail::zip_iterator_index_expr<IteratorTuple, IndexExpression>
+ operator[](const IndexExpression &expr) const
+ {
+ return detail::zip_iterator_index_expr<IteratorTuple,
+ IndexExpression>(m_iterators,
+ expr);
+ }
+
+private:
+ friend class ::boost::iterator_core_access;
+
+ reference dereference() const
+ {
+ return reference();
+ }
+
+ bool equal(const zip_iterator<IteratorTuple> &other) const
+ {
+ return m_iterators == other.m_iterators;
+ }
+
+ void increment()
+ {
+ boost::fusion::for_each(m_iterators, detail::increment_iterator);
+ }
+
+ void decrement()
+ {
+ boost::fusion::for_each(m_iterators, detail::decrement_iterator);
+ }
+
+ void advance(difference_type n)
+ {
+ boost::fusion::for_each(m_iterators, detail::iterator_advancer(n));
+ }
+
+ difference_type distance_to(const zip_iterator<IteratorTuple> &other) const
+ {
+ return std::distance(boost::get<0>(m_iterators),
+ boost::get<0>(other.m_iterators));
+ }
+
+private:
+ IteratorTuple m_iterators;
+};
+
+/// Creates a zip_iterator for \p iterators.
+///
+/// \param iterators a tuple of input iterators to zip together
+///
+/// \return a \c zip_iterator for \p iterators
+///
+/// For example, to zip together iterators from three vectors (\c a, \c b, and
+/// \p c):
+/// \code
+/// auto zipped = boost::compute::make_zip_iterator(
+/// boost::make_tuple(a.begin(), b.begin(), c.begin())
+/// );
+/// \endcode
+template<class IteratorTuple>
+inline zip_iterator<IteratorTuple>
+make_zip_iterator(IteratorTuple iterators)
+{
+ return zip_iterator<IteratorTuple>(iterators);
+}
+
+/// \internal_ (is_device_iterator specialization for zip_iterator)
+template<class IteratorTuple>
+struct is_device_iterator<zip_iterator<IteratorTuple> > : boost::true_type {};
+
+namespace detail {
+
+// get<N>() specialization for zip_iterator
+/// \internal_
+#define BOOST_COMPUTE_ZIP_GET_N(z, n, unused) \
+template<size_t N, class IteratorTuple, class IndexExpr, \
+ BOOST_PP_ENUM_PARAMS(n, class T)> \
+inline meta_kernel& \
+operator<<(meta_kernel &kernel, \
+ const invoked_get< \
+ N, \
+ zip_iterator_index_expr<IteratorTuple, IndexExpr>, \
+ boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> \
+ > &expr) \
+{ \
+ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> Tuple; \
+ typedef typename boost::tuples::element<N, Tuple>::type T; \
+ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<Tuple>::value)); \
+ kernel.inject_type<T>(); \
+ return kernel \
+ << boost::get<N>(expr.m_arg.m_iterators)[expr.m_arg.m_index_expr]; \
+}
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_ZIP_GET_N, ~)
+
+#undef BOOST_COMPUTE_ZIP_GET_N
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP
diff --git a/boost/compute/kernel.hpp b/boost/compute/kernel.hpp
new file mode 100644
index 0000000000..9494e46de2
--- /dev/null
+++ b/boost/compute/kernel.hpp
@@ -0,0 +1,394 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_KERNEL_HPP
+#define BOOST_COMPUTE_KERNEL_HPP
+
+#include <string>
+
+#include <boost/assert.hpp>
+#include <boost/utility/enable_if.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/type_traits/is_fundamental.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T> struct set_kernel_arg;
+
+} // end detail namespace
+
+/// \class kernel
+/// \brief A compute kernel.
+///
+/// \see command_queue, program
+class kernel
+{
+public:
+ /// Creates a null kernel object.
+ kernel()
+ : m_kernel(0)
+ {
+ }
+
+ /// Creates a new kernel object for \p kernel. If \p retain is
+ /// \c true, the reference count for \p kernel will be incremented.
+ explicit kernel(cl_kernel kernel, bool retain = true)
+ : m_kernel(kernel)
+ {
+ if(m_kernel && retain){
+ clRetainKernel(m_kernel);
+ }
+ }
+
+ /// Creates a new kernel object with \p name from \p program.
+ kernel(const program &program, const std::string &name)
+ {
+ cl_int error = 0;
+ m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
+
+ if(!m_kernel){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new kernel object as a copy of \p other.
+ kernel(const kernel &other)
+ : m_kernel(other.m_kernel)
+ {
+ if(m_kernel){
+ clRetainKernel(m_kernel);
+ }
+ }
+
+ /// Copies the kernel object from \p other to \c *this.
+ kernel& operator=(const kernel &other)
+ {
+ if(this != &other){
+ if(m_kernel){
+ clReleaseKernel(m_kernel);
+ }
+
+ m_kernel = other.m_kernel;
+
+ if(m_kernel){
+ clRetainKernel(m_kernel);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new kernel object from \p other.
+ kernel(kernel&& other) BOOST_NOEXCEPT
+ : m_kernel(other.m_kernel)
+ {
+ other.m_kernel = 0;
+ }
+
+ /// Move-assigns the kernel from \p other to \c *this.
+ kernel& operator=(kernel&& other) BOOST_NOEXCEPT
+ {
+ if(m_kernel){
+ clReleaseKernel(m_kernel);
+ }
+
+ m_kernel = other.m_kernel;
+ other.m_kernel = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the kernel object.
+ ~kernel()
+ {
+ if(m_kernel){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseKernel(m_kernel)
+ );
+ }
+ }
+
+ /// Returns a reference to the underlying OpenCL kernel object.
+ cl_kernel& get() const
+ {
+ return const_cast<cl_kernel &>(m_kernel);
+ }
+
+ /// Returns the function name for the kernel.
+ std::string name() const
+ {
+ return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
+ }
+
+ /// Returns the number of arguments for the kernel.
+ size_t arity() const
+ {
+ return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
+ }
+
+ /// Returns the program for the kernel.
+ program get_program() const
+ {
+ return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
+ }
+
+ /// Returns the context for the kernel.
+ context get_context() const
+ {
+ return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
+ }
+
+ /// Returns information about the kernel.
+ ///
+ /// \see_opencl_ref{clGetKernelInfo}
+ template<class T>
+ T get_info(cl_kernel_info info) const
+ {
+ return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<kernel, Enum>::type
+ get_info() const;
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Returns information about the argument at \p index.
+ ///
+ /// For example, to get the name of the first argument:
+ /// \code
+ /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
+ /// \endcode
+ ///
+ /// Note, this function requires that the program be compiled with the
+ /// \c "-cl-kernel-arg-info" flag. For example:
+ /// \code
+ /// program.build("-cl-kernel-arg-info");
+ /// \endcode
+ ///
+ /// \opencl_version_warning{1,2}
+ ///
+ /// \see_opencl_ref{clGetKernelArgInfo}
+ template<class T>
+ T get_arg_info(size_t index, cl_kernel_arg_info info) const
+ {
+ return detail::get_object_info<T>(clGetKernelArgInfo, m_kernel, info, index);
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Returns work-group information for the kernel with \p device.
+ ///
+ /// \see_opencl_ref{clGetKernelWorkGroupInfo}
+ template<class T>
+ T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
+ {
+ return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
+ }
+
+ /// Sets the argument at \p index to \p value with \p size.
+ ///
+ /// \see_opencl_ref{clSetKernelArg}
+ void set_arg(size_t index, size_t size, const void *value)
+ {
+ BOOST_ASSERT(index < arity());
+
+ cl_int ret = clSetKernelArg(m_kernel,
+ static_cast<cl_uint>(index),
+ size,
+ value);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ /// Sets the argument at \p index to \p value.
+ ///
+ /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
+ /// calling set_arg(index, sizeof(type), &value).
+ ///
+ /// Additionally, this method is specialized for device memory objects
+ /// such as buffer and image2d. This allows for them to be passed directly
+ /// without having to extract their underlying cl_mem object.
+ ///
+ /// This method is also specialized for device container types such as
+ /// vector<T> and array<T, N>. This allows for them to be passed directly
+ /// as kernel arguments without having to extract their underlying buffer.
+ ///
+ /// For setting local memory arguments (e.g. "__local float *buf"), the
+ /// local_buffer<T> class may be used:
+ /// \code
+ /// // set argument to a local buffer with storage for 32 float's
+ /// kernel.set_arg(0, local_buffer<float>(32));
+ /// \endcode
+ template<class T>
+ void set_arg(size_t index, const T &value)
+ {
+ // if you get a compilation error pointing here it means you
+ // attempted to set a kernel argument from an invalid type.
+ detail::set_kernel_arg<T>()(*this, index, value);
+ }
+
+ /// \internal_
+ void set_arg(size_t index, const cl_mem mem)
+ {
+ set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
+ }
+
+ /// \internal_
+ void set_arg(size_t index, const cl_sampler sampler)
+ {
+ set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
+ }
+
+ /// \internal_
+ template<class T>
+ void set_arg(size_t index, const svm_ptr<T> ptr)
+ {
+ #ifdef CL_VERSION_2_0
+ cl_int ret = clSetKernelArgSVMPointer(m_kernel, index, ptr.get());
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ #else
+ BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
+ #endif
+ }
+
+ #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+ /// Sets the arguments for the kernel to \p args.
+ template<class... T>
+ void set_args(T&&... args)
+ {
+ BOOST_ASSERT(sizeof...(T) <= arity());
+
+ _set_args<0>(args...);
+ }
+ #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+
+ #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Sets additional execution information for the kernel.
+ ///
+ /// \opencl_version_warning{2,0}
+ ///
+ /// \see_opencl2_ref{clSetKernelExecInfo}
+ void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
+ {
+ cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+ #endif // CL_VERSION_2_0
+
+ /// Returns \c true if the kernel is the same at \p other.
+ bool operator==(const kernel &other) const
+ {
+ return m_kernel == other.m_kernel;
+ }
+
+ /// Returns \c true if the kernel is different from \p other.
+ bool operator!=(const kernel &other) const
+ {
+ return m_kernel != other.m_kernel;
+ }
+
+ /// \internal_
+ operator cl_kernel() const
+ {
+ return m_kernel;
+ }
+
+ /// \internal_
+ static kernel create_with_source(const std::string &source,
+ const std::string &name,
+ const context &context)
+ {
+ return program::build_with_source(source, context).create_kernel(name);
+ }
+
+private:
+ #ifndef BOOST_NO_VARIADIC_TEMPLATES
+ /// \internal_
+ template<size_t N>
+ void _set_args()
+ {
+ }
+
+ /// \internal_
+ template<size_t N, class T, class... Args>
+ void _set_args(T&& arg, Args&&... rest)
+ {
+ set_arg(N, arg);
+ _set_args<N+1>(rest...);
+ }
+ #endif // BOOST_NO_VARIADIC_TEMPLATES
+
+private:
+ cl_kernel m_kernel;
+};
+
+inline kernel program::create_kernel(const std::string &name) const
+{
+ return kernel(*this, name);
+}
+
+/// \internal_ define get_info() specializations for kernel
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
+ ((std::string, CL_KERNEL_FUNCTION_NAME))
+ ((cl_uint, CL_KERNEL_NUM_ARGS))
+ ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
+ ((cl_context, CL_KERNEL_CONTEXT))
+ ((cl_program, CL_KERNEL_PROGRAM))
+)
+
+#ifdef CL_VERSION_1_2
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
+ ((std::string, CL_KERNEL_ATTRIBUTES))
+)
+#endif // CL_VERSION_1_2
+
+namespace detail {
+
+// set_kernel_arg implementation for built-in types
+template<class T>
+struct set_kernel_arg
+{
+ typename boost::enable_if<is_fundamental<T> >::type
+ operator()(kernel &kernel_, size_t index, const T &value)
+ {
+ kernel_.set_arg(index, sizeof(T), &value);
+ }
+};
+
+// set_kernel_arg specialization for char (different from built-in cl_char)
+template<>
+struct set_kernel_arg<char>
+{
+ void operator()(kernel &kernel_, size_t index, const char c)
+ {
+ kernel_.set_arg(index, sizeof(char), &c);
+ }
+};
+
+} // end detail namespace
+} // end namespace compute
+} // end namespace boost
+
+#endif // BOOST_COMPUTE_KERNEL_HPP
diff --git a/boost/compute/lambda.hpp b/boost/compute/lambda.hpp
new file mode 100644
index 0000000000..24bedfd721
--- /dev/null
+++ b/boost/compute/lambda.hpp
@@ -0,0 +1,22 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_HPP
+#define BOOST_COMPUTE_LAMBDA_HPP
+
+#include <boost/compute/lambda/context.hpp>
+#include <boost/compute/lambda/functional.hpp>
+#include <boost/compute/lambda/get.hpp>
+#include <boost/compute/lambda/make_pair.hpp>
+#include <boost/compute/lambda/make_tuple.hpp>
+#include <boost/compute/lambda/placeholders.hpp>
+#include <boost/compute/lambda/result_of.hpp>
+
+#endif // BOOST_COMPUTE_LAMBDA_HPP
diff --git a/boost/compute/lambda/context.hpp b/boost/compute/lambda/context.hpp
new file mode 100644
index 0000000000..ed25b79475
--- /dev/null
+++ b/boost/compute/lambda/context.hpp
@@ -0,0 +1,329 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_CONTEXT_HPP
+#define BOOST_COMPUTE_LAMBDA_CONTEXT_HPP
+
+#include <boost/proto/core.hpp>
+#include <boost/proto/context.hpp>
+#include <boost/type_traits.hpp>
+#include <boost/preprocessor/repetition.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/lambda/result_of.hpp>
+#include <boost/compute/lambda/functional.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+
+namespace mpl = boost::mpl;
+namespace proto = boost::proto;
+
+#define BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(tag, op) \
+ template<class LHS, class RHS> \
+ void operator()(tag, const LHS &lhs, const RHS &rhs) \
+ { \
+ if(proto::arity_of<LHS>::value > 0){ \
+ stream << '('; \
+ proto::eval(lhs, *this); \
+ stream << ')'; \
+ } \
+ else { \
+ proto::eval(lhs, *this); \
+ } \
+ \
+ stream << op; \
+ \
+ if(proto::arity_of<RHS>::value > 0){ \
+ stream << '('; \
+ proto::eval(rhs, *this); \
+ stream << ')'; \
+ } \
+ else { \
+ proto::eval(rhs, *this); \
+ } \
+ }
+
+// lambda expression context
+template<class Args>
+struct context : proto::callable_context<context<Args> >
+{
+ typedef void result_type;
+ typedef Args args_tuple;
+
+ // create a lambda context for kernel with args
+ context(boost::compute::detail::meta_kernel &kernel, const Args &args_)
+ : stream(kernel),
+ args(args_)
+ {
+ }
+
+ // handle terminals
+ template<class T>
+ void operator()(proto::tag::terminal, const T &x)
+ {
+ // terminal values in lambda expressions are always literals
+ stream << stream.lit(x);
+ }
+
+ // handle placeholders
+ template<int I>
+ void operator()(proto::tag::terminal, placeholder<I>)
+ {
+ stream << boost::get<I>(args);
+ }
+
+ // handle functions
+ #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG(z, n, unused) \
+ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n)
+
+ #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION(z, n, unused) \
+ template<class F, BOOST_PP_ENUM_PARAMS(n, class Arg)> \
+ void operator()( \
+ proto::tag::function, \
+ const F &function, \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG, ~) \
+ ) \
+ { \
+ proto::value(function).apply(*this, BOOST_PP_ENUM_PARAMS(n, arg)); \
+ }
+
+ BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION, ~)
+
+ #undef BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION
+
+ // operators
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::plus, '+')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::minus, '-')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::multiplies, '*')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::divides, '/')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::modulus, '%')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less, '<')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater, '>')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less_equal, "<=")
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater_equal, ">=")
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::equal_to, "==")
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::not_equal_to, "!=")
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_and, "&&")
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_or, "||")
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_and, '&')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_or, '|')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_xor, '^')
+ BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::assign, '=')
+
+ // subscript operator
+ template<class LHS, class RHS>
+ void operator()(proto::tag::subscript, const LHS &lhs, const RHS &rhs)
+ {
+ proto::eval(lhs, *this);
+ stream << '[';
+ proto::eval(rhs, *this);
+ stream << ']';
+ }
+
+ // ternary conditional operator
+ template<class Pred, class Arg1, class Arg2>
+ void operator()(proto::tag::if_else_, const Pred &p, const Arg1 &x, const Arg2 &y)
+ {
+ proto::eval(p, *this);
+ stream << '?';
+ proto::eval(x, *this);
+ stream << ':';
+ proto::eval(y, *this);
+ }
+
+ boost::compute::detail::meta_kernel &stream;
+ Args args;
+};
+
+namespace detail {
+
+template<class Expr, class Arg>
+struct invoked_unary_expression
+{
+ typedef typename ::boost::compute::result_of<Expr(Arg)>::type result_type;
+
+ invoked_unary_expression(const Expr &expr, const Arg &arg)
+ : m_expr(expr),
+ m_arg(arg)
+ {
+ }
+
+ Expr m_expr;
+ Arg m_arg;
+};
+
+template<class Expr, class Arg>
+boost::compute::detail::meta_kernel&
+operator<<(boost::compute::detail::meta_kernel &kernel,
+ const invoked_unary_expression<Expr, Arg> &expr)
+{
+ context<boost::tuple<Arg> > ctx(kernel, boost::make_tuple(expr.m_arg));
+ proto::eval(expr.m_expr, ctx);
+
+ return kernel;
+}
+
+template<class Expr, class Arg1, class Arg2>
+struct invoked_binary_expression
+{
+ typedef typename ::boost::compute::result_of<Expr(Arg1, Arg2)>::type result_type;
+
+ invoked_binary_expression(const Expr &expr,
+ const Arg1 &arg1,
+ const Arg2 &arg2)
+ : m_expr(expr),
+ m_arg1(arg1),
+ m_arg2(arg2)
+ {
+ }
+
+ Expr m_expr;
+ Arg1 m_arg1;
+ Arg2 m_arg2;
+};
+
+template<class Expr, class Arg1, class Arg2>
+boost::compute::detail::meta_kernel&
+operator<<(boost::compute::detail::meta_kernel &kernel,
+ const invoked_binary_expression<Expr, Arg1, Arg2> &expr)
+{
+ context<boost::tuple<Arg1, Arg2> > ctx(
+ kernel,
+ boost::make_tuple(expr.m_arg1, expr.m_arg2)
+ );
+ proto::eval(expr.m_expr, ctx);
+
+ return kernel;
+}
+
+} // end detail namespace
+
+// forward declare domain
+struct domain;
+
+// lambda expression wrapper
+template<class Expr>
+struct expression : proto::extends<Expr, expression<Expr>, domain>
+{
+ typedef proto::extends<Expr, expression<Expr>, domain> base_type;
+
+ BOOST_PROTO_EXTENDS_USING_ASSIGN(expression)
+
+ expression(const Expr &expr = Expr())
+ : base_type(expr)
+ {
+ }
+
+ // result_of protocol
+ template<class Signature>
+ struct result
+ {
+ };
+
+ template<class This>
+ struct result<This()>
+ {
+ typedef
+ typename ::boost::compute::lambda::result_of<Expr>::type type;
+ };
+
+ template<class This, class Arg>
+ struct result<This(Arg)>
+ {
+ typedef
+ typename ::boost::compute::lambda::result_of<
+ Expr,
+ typename boost::tuple<Arg>
+ >::type type;
+ };
+
+ template<class This, class Arg1, class Arg2>
+ struct result<This(Arg1, Arg2)>
+ {
+ typedef typename
+ ::boost::compute::lambda::result_of<
+ Expr,
+ typename boost::tuple<Arg1, Arg2>
+ >::type type;
+ };
+
+ template<class Arg>
+ detail::invoked_unary_expression<expression<Expr>, Arg>
+ operator()(const Arg &x) const
+ {
+ return detail::invoked_unary_expression<expression<Expr>, Arg>(*this, x);
+ }
+
+ template<class Arg1, class Arg2>
+ detail::invoked_binary_expression<expression<Expr>, Arg1, Arg2>
+ operator()(const Arg1 &x, const Arg2 &y) const
+ {
+ return detail::invoked_binary_expression<
+ expression<Expr>,
+ Arg1,
+ Arg2
+ >(*this, x, y);
+ }
+
+ // function<> conversion operator
+ template<class R, class A1>
+ operator function<R(A1)>() const
+ {
+ using ::boost::compute::detail::meta_kernel;
+
+ std::stringstream source;
+
+ ::boost::compute::detail::meta_kernel_variable<A1> arg1("x");
+
+ source << "inline " << type_name<R>() << " lambda"
+ << ::boost::compute::detail::generate_argument_list<R(A1)>('x')
+ << "{\n"
+ << " return " << meta_kernel::expr_to_string((*this)(arg1)) << ";\n"
+ << "}\n";
+
+ return make_function_from_source<R(A1)>("lambda", source.str());
+ }
+
+ template<class R, class A1, class A2>
+ operator function<R(A1, A2)>() const
+ {
+ using ::boost::compute::detail::meta_kernel;
+
+ std::stringstream source;
+
+ ::boost::compute::detail::meta_kernel_variable<A1> arg1("x");
+ ::boost::compute::detail::meta_kernel_variable<A1> arg2("y");
+
+ source << "inline " << type_name<R>() << " lambda"
+ << ::boost::compute::detail::generate_argument_list<R(A1, A2)>('x')
+ << "{\n"
+ << " return " << meta_kernel::expr_to_string((*this)(arg1, arg2)) << ";\n"
+ << "}\n";
+
+ return make_function_from_source<R(A1, A2)>("lambda", source.str());
+ }
+};
+
+// lambda expression domain
+struct domain : proto::domain<proto::generator<expression> >
+{
+};
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_CONTEXT_HPP
diff --git a/boost/compute/lambda/functional.hpp b/boost/compute/lambda/functional.hpp
new file mode 100644
index 0000000000..dd7190e4d9
--- /dev/null
+++ b/boost/compute/lambda/functional.hpp
@@ -0,0 +1,242 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP
+#define BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP
+
+#include <boost/tuple/tuple.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <boost/proto/core.hpp>
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/stringize.hpp>
+
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/lambda/result_of.hpp>
+#include <boost/compute/lambda/placeholder.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+
+namespace mpl = boost::mpl;
+namespace proto = boost::proto;
+
+// wraps a unary boolean function
+#define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef int type; \
+ }; \
+ \
+ template<class Context, class Arg> \
+ static void apply(Context &ctx, const Arg &arg) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \
+ >::type const \
+ name(const Arg &arg) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \
+ ); \
+ }
+
+// wraps a unary function who's return type is the same as the argument type
+#define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \
+ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg> \
+ static void apply(Context &ctx, const Arg &arg) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \
+ >::type const \
+ name(const Arg &arg) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \
+ ); \
+ }
+
+// wraps a binary function
+#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \
+ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \
+ ); \
+ }
+
+// wraps a binary function who's result type is the scalar type of the first argument
+#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \
+ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type result_type; \
+ typedef typename ::boost::compute::scalar_type<result_type>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \
+ ); \
+ }
+
+// wraps a ternary function
+#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \
+ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2, class Arg3> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg3, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2, class Arg3> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \
+ ); \
+ }
+
+
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite)
+
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(abs)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acos)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asin)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atan)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp10)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log10)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(length)
+
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pow)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pown)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(powr)
+
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance)
+
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(smoothstep)
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP
diff --git a/boost/compute/lambda/get.hpp b/boost/compute/lambda/get.hpp
new file mode 100644
index 0000000000..547c208e95
--- /dev/null
+++ b/boost/compute/lambda/get.hpp
@@ -0,0 +1,148 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_GET_HPP
+#define BOOST_COMPUTE_LAMBDA_GET_HPP
+
+#include <boost/preprocessor/repetition.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/lambda/placeholder.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+namespace detail {
+
+// function wrapper for get<N>() in lambda expressions
+template<size_t N>
+struct get_func
+{
+ template<class Expr, class Args>
+ struct lambda_result
+ {
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg;
+ typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type T;
+ typedef typename ::boost::compute::detail::get_result_type<N, T>::type type;
+ };
+
+ template<class Context, class Arg>
+ struct make_get_result_type
+ {
+ typedef typename boost::remove_cv<
+ typename boost::compute::lambda::result_of<
+ Arg, typename Context::args_tuple
+ >::type
+ >::type type;
+ };
+
+ // returns the suffix string for get<N>() in lambda expressions
+ // (e.g. ".x" for get<0>() with float4)
+ template<class T>
+ struct make_get_suffix
+ {
+ static std::string value()
+ {
+ BOOST_STATIC_ASSERT(N < 16);
+
+ std::stringstream stream;
+
+ if(N < 10){
+ stream << ".s" << uint_(N);
+ }
+ else if(N < 16){
+ stream << ".s" << char('a' + (N - 10));
+ }
+
+ return stream.str();
+ }
+ };
+
+ // get<N>() specialization for std::pair<T1, T2>
+ template<class T1, class T2>
+ struct make_get_suffix<std::pair<T1, T2> >
+ {
+ static std::string value()
+ {
+ BOOST_STATIC_ASSERT(N < 2);
+
+ if(N == 0){
+ return ".first";
+ }
+ else {
+ return ".second";
+ }
+ };
+ };
+
+ // get<N>() specialization for boost::tuple<T...>
+ #define BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX(z, n, unused) \
+ template<BOOST_PP_ENUM_PARAMS(n, class T)> \
+ struct make_get_suffix<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \
+ { \
+ static std::string value() \
+ { \
+ BOOST_STATIC_ASSERT(N < n); \
+ return ".v" + boost::lexical_cast<std::string>(N); \
+ } \
+ };
+
+ BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX, ~)
+
+ #undef BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX
+
+ template<class Context, class Arg>
+ static void dispatch_apply_terminal(Context &ctx, const Arg &arg)
+ {
+ typedef typename make_get_result_type<Context, Arg>::type T;
+
+ proto::eval(arg, ctx);
+ ctx.stream << make_get_suffix<T>::value();
+ }
+
+ template<class Context, int I>
+ static void dispatch_apply_terminal(Context &ctx, placeholder<I>)
+ {
+ ctx.stream << ::boost::compute::get<N>()(::boost::get<I>(ctx.args));
+ }
+
+ template<class Context, class Arg>
+ static void dispatch_apply(Context &ctx, const Arg &arg, proto::tag::terminal)
+ {
+ dispatch_apply_terminal(ctx, proto::value(arg));
+ }
+
+ template<class Context, class Arg>
+ static void apply(Context &ctx, const Arg &arg)
+ {
+ dispatch_apply(ctx, arg, typename proto::tag_of<Arg>::type());
+ }
+};
+
+} // end detail namespace
+
+// get<N>()
+template<size_t N, class Arg>
+inline typename proto::result_of::make_expr<
+ proto::tag::function, detail::get_func<N>, const Arg&
+>::type const
+get(const Arg &arg)
+{
+ return proto::make_expr<proto::tag::function>(
+ detail::get_func<N>(), ::boost::ref(arg)
+ );
+}
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_GET_HPP
diff --git a/boost/compute/lambda/make_pair.hpp b/boost/compute/lambda/make_pair.hpp
new file mode 100644
index 0000000000..3adca97c71
--- /dev/null
+++ b/boost/compute/lambda/make_pair.hpp
@@ -0,0 +1,70 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP
+#define BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP
+
+#include <boost/compute/types/pair.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+namespace detail {
+
+// function wrapper for make_pair() in lambda expressions
+struct make_pair_func
+{
+ template<class Expr, class Args>
+ struct lambda_result
+ {
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1;
+ typedef typename proto::result_of::child_c<Expr, 2>::type Arg2;
+
+ typedef typename lambda::result_of<Arg1, Args>::type T1;
+ typedef typename lambda::result_of<Arg2, Args>::type T2;
+
+ typedef std::pair<T1, T2> type;
+ };
+
+ template<class Context, class Arg1, class Arg2>
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2)
+ {
+ typedef typename lambda::result_of<Arg1, typename Context::args_tuple>::type T1;
+ typedef typename lambda::result_of<Arg2, typename Context::args_tuple>::type T2;
+
+ ctx.stream << "boost_make_pair(";
+ ctx.stream << type_name<T1>() << ", ";
+ proto::eval(arg1, ctx);
+ ctx.stream << ", ";
+ ctx.stream << type_name<T2>() << ", ";
+ proto::eval(arg2, ctx);
+ ctx.stream << ")";
+ }
+};
+
+} // end detail namespace
+
+// make_pair(first, second)
+template<class Arg1, class Arg2>
+inline typename proto::result_of::make_expr<
+ proto::tag::function, detail::make_pair_func, const Arg1&, const Arg2&
+>::type const
+make_pair(const Arg1 &first, const Arg2 &second)
+{
+ return proto::make_expr<proto::tag::function>(
+ detail::make_pair_func(), ::boost::ref(first), ::boost::ref(second)
+ );
+}
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP
diff --git a/boost/compute/lambda/make_tuple.hpp b/boost/compute/lambda/make_tuple.hpp
new file mode 100644
index 0000000000..2d065a999a
--- /dev/null
+++ b/boost/compute/lambda/make_tuple.hpp
@@ -0,0 +1,127 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP
+#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP
+
+#include <boost/preprocessor/repetition.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/types/tuple.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+namespace detail {
+
+// function wrapper for make_tuple() in lambda expressions
+struct make_tuple_func
+{
+ template<class Expr, class Args, int N>
+ struct make_tuple_result_type;
+
+ #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG(z, n, unused) \
+ typedef typename proto::result_of::child_c<Expr, BOOST_PP_INC(n)>::type BOOST_PP_CAT(Arg, n);
+
+ #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE(z, n, unused) \
+ typedef typename lambda::result_of<BOOST_PP_CAT(Arg, n), Args>::type BOOST_PP_CAT(T, n);
+
+ #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE(z, n, unused) \
+ template<class Expr, class Args> \
+ struct make_tuple_result_type<Expr, Args, n> \
+ { \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG, ~) \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE, ~) \
+ typedef boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> type; \
+ };
+
+ BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE, ~)
+
+ #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG
+ #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE
+ #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE
+
+ template<class Expr, class Args>
+ struct lambda_result
+ {
+ typedef typename make_tuple_result_type<
+ Expr, Args, proto::arity_of<Expr>::value - 1
+ >::type type;
+ };
+
+ #define BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE(z, n, unused) \
+ typedef typename lambda::result_of< \
+ BOOST_PP_CAT(Arg, n), typename Context::args_tuple \
+ >::type BOOST_PP_CAT(T, n);
+
+ #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG(z, n, unused) \
+ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n)
+
+ #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG(z, n, unused) \
+ BOOST_PP_EXPR_IF(n, ctx.stream << ", ";) proto::eval(BOOST_PP_CAT(arg, n), ctx);
+
+ #define BOOST_COMPUTE_MAKE_TUPLE_APPLY(z, n, unused) \
+ template<class Context, BOOST_PP_ENUM_PARAMS(n, class Arg)> \
+ static void apply(Context &ctx, BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG, ~)) \
+ { \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE, ~) \
+ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> tuple_type; \
+ ctx.stream.template inject_type<tuple_type>(); \
+ ctx.stream << "((" << type_name<tuple_type>() << "){"; \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG, ~) \
+ ctx.stream << "})"; \
+ }
+
+ BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_APPLY, ~)
+
+ #undef BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE
+ #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG
+ #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG
+ #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY
+};
+
+} // end detail namespace
+
+#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG(z, n, unused) \
+ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n)
+
+#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE(z, n, unused) \
+ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) &
+
+#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG(z, n, unused) \
+ BOOST_PP_COMMA_IF(n) ::boost::ref(BOOST_PP_CAT(arg, n))
+
+#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE(z, n, unused) \
+template<BOOST_PP_ENUM_PARAMS(n, class Arg)> \
+inline typename proto::result_of::make_expr< \
+ proto::tag::function, \
+ detail::make_tuple_func, \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE, ~) \
+>::type \
+make_tuple(BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG, ~)) \
+{ \
+ return proto::make_expr<proto::tag::function>( \
+ detail::make_tuple_func(), \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG, ~) \
+ ); \
+}
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE, ~)
+
+#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG
+#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE
+#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG
+#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP
diff --git a/boost/compute/lambda/placeholder.hpp b/boost/compute/lambda/placeholder.hpp
new file mode 100644
index 0000000000..4774b1b4f3
--- /dev/null
+++ b/boost/compute/lambda/placeholder.hpp
@@ -0,0 +1,28 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP
+#define BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP
+
+namespace boost {
+namespace compute {
+namespace lambda {
+
+// lambda placeholder type
+template<int I>
+struct placeholder
+{
+};
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP
diff --git a/boost/compute/lambda/placeholders.hpp b/boost/compute/lambda/placeholders.hpp
new file mode 100644
index 0000000000..5c3fe531d1
--- /dev/null
+++ b/boost/compute/lambda/placeholders.hpp
@@ -0,0 +1,93 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP
+#define BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP
+
+#include <boost/mpl/has_xxx.hpp>
+
+#include <boost/compute/lambda/context.hpp>
+#include <boost/compute/lambda/result_of.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+
+namespace mpl = boost::mpl;
+namespace proto = boost::proto;
+
+// lambda placeholders
+expression<proto::terminal<placeholder<0> >::type> const _1;
+expression<proto::terminal<placeholder<1> >::type> const _2;
+expression<proto::terminal<placeholder<2> >::type> const _3;
+
+namespace detail {
+
+BOOST_MPL_HAS_XXX_TRAIT_DEF(result_type)
+
+template<class T, bool HasResultType>
+struct terminal_type_impl;
+
+template<class T>
+struct terminal_type_impl<T, true>
+{
+ typedef typename T::result_type type;
+};
+
+template<class T>
+struct terminal_type_impl<T, false>
+{
+ typedef T type;
+};
+
+template<class T>
+struct terminal_type
+{
+ typedef typename terminal_type_impl<T, has_result_type<T>::value>::type type;
+};
+
+} // end detail namespace
+
+// result_of placeholders
+template<class Args>
+struct result_of<expression<proto::terminal<placeholder<0> >::type>, Args, proto::tag::terminal>
+{
+ typedef typename boost::tuples::element<0, Args>::type arg_type;
+
+ typedef typename detail::terminal_type<arg_type>::type type;
+};
+
+template<class Args>
+struct result_of<expression<proto::terminal<placeholder<1> >::type>, Args, proto::tag::terminal>
+{
+ typedef typename boost::tuples::element<1, Args>::type arg_type;
+
+ typedef typename detail::terminal_type<arg_type>::type type;
+};
+
+template<class Args>
+struct result_of<expression<proto::terminal<placeholder<2> >::type>, Args, proto::tag::terminal>
+{
+ typedef typename boost::tuples::element<2, Args>::type arg_type;
+
+ typedef typename detail::terminal_type<arg_type>::type type;
+};
+
+} // end lambda namespace
+
+// lift lambda placeholders up to the boost::compute namespace
+using lambda::_1;
+using lambda::_2;
+using lambda::_3;
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP
diff --git a/boost/compute/lambda/result_of.hpp b/boost/compute/lambda/result_of.hpp
new file mode 100644
index 0000000000..baae87f05c
--- /dev/null
+++ b/boost/compute/lambda/result_of.hpp
@@ -0,0 +1,113 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP
+#define BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP
+
+#include <boost/mpl/vector.hpp>
+#include <boost/proto/proto.hpp>
+
+#include <boost/compute/type_traits/common_type.hpp>
+
+namespace boost {
+namespace compute {
+namespace lambda {
+
+namespace mpl = boost::mpl;
+namespace proto = boost::proto;
+
+// meta-function returning the result type of a lambda expression
+template<class Expr,
+ class Args = void,
+ class Tags = typename proto::tag_of<Expr>::type>
+struct result_of
+{
+};
+
+// terminals
+template<class Expr, class Args>
+struct result_of<Expr, Args, proto::tag::terminal>
+{
+ typedef typename proto::result_of::value<Expr>::type type;
+};
+
+// binary operators
+#define BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(tag) \
+ template<class Expr, class Args> \
+ struct result_of<Expr, Args, tag> \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 0>::type left; \
+ typedef typename proto::result_of::child_c<Expr, 1>::type right; \
+ \
+ typedef typename boost::common_type< \
+ typename ::boost::compute::lambda::result_of< \
+ left, \
+ Args, \
+ typename proto::tag_of<left>::type>::type, \
+ typename ::boost::compute::lambda::result_of< \
+ right, \
+ Args, \
+ typename proto::tag_of<right>::type>::type \
+ >::type type; \
+ };
+
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::plus)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::minus)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::multiplies)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::divides)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::modulus)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_and)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_or)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_xor)
+
+// comparision operators
+#define BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(tag) \
+ template<class Expr, class Args> \
+ struct result_of<Expr, Args, tag> \
+ { \
+ typedef bool type; \
+ };
+
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less_equal)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater_equal)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::equal_to)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::not_equal_to)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_and)
+BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_or)
+
+// assignment operator
+template<class Expr, class Args>
+struct result_of<Expr, Args, proto::tag::assign>
+{
+ typedef typename proto::result_of::child_c<Expr, 0>::type left;
+ typedef typename proto::result_of::child_c<Expr, 1>::type right;
+
+ typedef typename ::boost::compute::lambda::result_of<
+ right, Args, typename proto::tag_of<right>::type
+ >::type type;
+};
+
+// functions
+template<class Expr, class Args>
+struct result_of<Expr, Args, proto::tag::function>
+{
+ typedef typename proto::result_of::child_c<Expr, 0>::type func_expr;
+ typedef typename proto::result_of::value<func_expr>::type func;
+
+ typedef typename func::template lambda_result<Expr, Args>::type type;
+};
+
+} // end lambda namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP
diff --git a/boost/compute/memory.hpp b/boost/compute/memory.hpp
new file mode 100644
index 0000000000..b4dccf4890
--- /dev/null
+++ b/boost/compute/memory.hpp
@@ -0,0 +1,21 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_MEMORY_HPP
+#define BOOST_COMPUTE_MEMORY_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute memory headers.
+
+#include <boost/compute/memory/local_buffer.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+
+#endif // BOOST_COMPUTE_MEMORY_HPP
diff --git a/boost/compute/memory/local_buffer.hpp b/boost/compute/memory/local_buffer.hpp
new file mode 100644
index 0000000000..aaf631317a
--- /dev/null
+++ b/boost/compute/memory/local_buffer.hpp
@@ -0,0 +1,91 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP
+#define BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/kernel.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class local_buffer
+/// \brief Represents a local memory buffer on the device.
+///
+/// The local_buffer class represents a block of local memory on a compute
+/// device.
+///
+/// This class is most commonly used to set local memory arguments for compute
+/// kernels:
+/// \code
+/// // set argument to a local buffer with storage for 32 float's
+/// kernel.set_arg(0, local_buffer<float>(32));
+/// \endcode
+///
+/// \see buffer, kernel
+template<class T>
+class local_buffer
+{
+public:
+ /// Creates a local buffer object for \p size elements.
+ local_buffer(const size_t size)
+ : m_size(size)
+ {
+ }
+
+ /// Creates a local buffer object as a copy of \p other.
+ local_buffer(const local_buffer &other)
+ : m_size(other.m_size)
+ {
+ }
+
+ /// Copies \p other to \c *this.
+ local_buffer& operator=(const local_buffer &other)
+ {
+ if(this != &other){
+ m_size = other.m_size;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the local memory object.
+ ~local_buffer()
+ {
+ }
+
+ /// Returns the number of elements in the local buffer.
+ size_t size() const
+ {
+ return m_size;
+ }
+
+private:
+ size_t m_size;
+};
+
+namespace detail {
+
+// set_kernel_arg specialization for local_buffer<T>
+template<class T>
+struct set_kernel_arg<local_buffer<T> >
+{
+ void operator()(kernel &kernel_, size_t index, const local_buffer<T> &buffer)
+ {
+ kernel_.set_arg(index, buffer.size() * sizeof(T), 0);
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP
diff --git a/boost/compute/memory/svm_ptr.hpp b/boost/compute/memory/svm_ptr.hpp
new file mode 100644
index 0000000000..2dbcb8f635
--- /dev/null
+++ b/boost/compute/memory/svm_ptr.hpp
@@ -0,0 +1,81 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_MEMORY_SVM_PTR_HPP
+#define BOOST_COMPUTE_MEMORY_SVM_PTR_HPP
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T>
+class svm_ptr
+{
+public:
+ typedef T value_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef T& reference;
+ typedef std::random_access_iterator_tag iterator_category;
+
+ svm_ptr()
+ : m_ptr(0)
+ {
+ }
+
+ explicit svm_ptr(void *ptr)
+ : m_ptr(static_cast<T*>(ptr))
+ {
+ }
+
+ svm_ptr(const svm_ptr<T> &other)
+ : m_ptr(other.m_ptr)
+ {
+ }
+
+ svm_ptr& operator=(const svm_ptr<T> &other)
+ {
+ m_ptr = other.m_ptr;
+ return *this;
+ }
+
+ ~svm_ptr()
+ {
+ }
+
+ void* get() const
+ {
+ return m_ptr;
+ }
+
+ svm_ptr<T> operator+(difference_type n)
+ {
+ return svm_ptr<T>(m_ptr + n);
+ }
+
+ difference_type operator-(svm_ptr<T> other)
+ {
+ return m_ptr - other.m_ptr;
+ }
+
+private:
+ T *m_ptr;
+};
+
+/// \internal_ (is_device_iterator specialization for svm_ptr)
+template<class T>
+struct is_device_iterator<svm_ptr<T> > : boost::true_type {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP
diff --git a/boost/compute/memory_object.hpp b/boost/compute/memory_object.hpp
new file mode 100644
index 0000000000..14c4cf4c7e
--- /dev/null
+++ b/boost/compute/memory_object.hpp
@@ -0,0 +1,264 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_MEMORY_OBJECT_HPP
+#define BOOST_COMPUTE_MEMORY_OBJECT_HPP
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class memory_object
+/// \brief Base-class for memory objects.
+///
+/// The memory_object class is the base-class for memory objects on
+/// compute devices.
+///
+/// \see buffer, vector
+class memory_object
+{
+public:
+ /// Flags for the creation of memory objects.
+ enum mem_flags {
+ read_write = CL_MEM_READ_WRITE,
+ read_only = CL_MEM_READ_ONLY,
+ write_only = CL_MEM_WRITE_ONLY,
+ use_host_ptr = CL_MEM_USE_HOST_PTR,
+ alloc_host_ptr = CL_MEM_ALLOC_HOST_PTR,
+ copy_host_ptr = CL_MEM_COPY_HOST_PTR
+ #ifdef CL_VERSION_1_2
+ ,
+ host_write_only = CL_MEM_HOST_WRITE_ONLY,
+ host_read_only = CL_MEM_HOST_READ_ONLY,
+ host_no_access = CL_MEM_HOST_NO_ACCESS
+ #endif
+ };
+
+ /// Symbolic names for the OpenCL address spaces.
+ enum address_space {
+ global_memory,
+ local_memory,
+ private_memory,
+ constant_memory
+ };
+
+ /// Returns the underlying OpenCL memory object.
+ cl_mem& get() const
+ {
+ return const_cast<cl_mem &>(m_mem);
+ }
+
+ /// Returns the size of the memory object in bytes.
+ size_t get_memory_size() const
+ {
+ return get_memory_info<size_t>(CL_MEM_SIZE);
+ }
+
+ /// Returns the type for the memory object.
+ cl_mem_object_type get_memory_type() const
+ {
+ return get_memory_info<cl_mem_object_type>(CL_MEM_TYPE);
+ }
+
+ /// Returns the flags for the memory object.
+ cl_mem_flags get_memory_flags() const
+ {
+ return get_memory_info<cl_mem_flags>(CL_MEM_FLAGS);
+ }
+
+ /// Returns the context for the memory object.
+ context get_context() const
+ {
+ return context(get_memory_info<cl_context>(CL_MEM_CONTEXT));
+ }
+
+ /// Returns the host pointer associated with the memory object.
+ void* get_host_ptr() const
+ {
+ return get_memory_info<void *>(CL_MEM_HOST_PTR);
+ }
+
+ /// Returns the reference count for the memory object.
+ uint_ reference_count() const
+ {
+ return get_memory_info<uint_>(CL_MEM_REFERENCE_COUNT);
+ }
+
+ /// Returns information about the memory object.
+ ///
+ /// \see_opencl_ref{clGetMemObjectInfo}
+ template<class T>
+ T get_memory_info(cl_mem_info info) const
+ {
+ return detail::get_object_info<T>(clGetMemObjectInfo, m_mem, info);
+ }
+
+ #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Registers a function to be called when the memory object is deleted
+ /// and its resources freed.
+ ///
+ /// \see_opencl_ref{clSetMemObjectDestructorCallback}
+ ///
+ /// \opencl_version_warning{1,1}
+ void set_destructor_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)(
+ cl_mem memobj, void *user_data
+ ),
+ void *user_data = 0)
+ {
+ cl_int ret = clSetMemObjectDestructorCallback(m_mem, callback, user_data);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+ /// Registers a function to be called when the memory object is deleted
+ /// and its resources freed.
+ ///
+ /// The function specified by \p callback must be invokable with zero
+ /// arguments (e.g. \c callback()).
+ ///
+ /// \opencl_version_warning{1,1}
+ template<class Function>
+ void set_destructor_callback(Function callback)
+ {
+ set_destructor_callback(
+ destructor_callback_invoker,
+ new boost::function<void()>(callback)
+ );
+ }
+ #endif // CL_VERSION_1_1
+
+ /// Returns \c true if the memory object is the same as \p other.
+ bool operator==(const memory_object &other) const
+ {
+ return m_mem == other.m_mem;
+ }
+
+ /// Returns \c true if the memory object is different from \p other.
+ bool operator!=(const memory_object &other) const
+ {
+ return m_mem != other.m_mem;
+ }
+
+private:
+ #ifdef CL_VERSION_1_1
+ /// \internal_
+ static void BOOST_COMPUTE_CL_CALLBACK
+ destructor_callback_invoker(cl_mem, void *user_data)
+ {
+ boost::function<void()> *callback =
+ static_cast<boost::function<void()> *>(user_data);
+
+ (*callback)();
+
+ delete callback;
+ }
+ #endif // CL_VERSION_1_1
+
+protected:
+ /// \internal_
+ memory_object()
+ : m_mem(0)
+ {
+ }
+
+ /// \internal_
+ explicit memory_object(cl_mem mem, bool retain = true)
+ : m_mem(mem)
+ {
+ if(m_mem && retain){
+ clRetainMemObject(m_mem);
+ }
+ }
+
+ /// \internal_
+ memory_object(const memory_object &other)
+ : m_mem(other.m_mem)
+ {
+ if(m_mem){
+ clRetainMemObject(m_mem);
+ }
+ }
+
+ /// \internal_
+ memory_object& operator=(const memory_object &other)
+ {
+ if(this != &other){
+ if(m_mem){
+ clReleaseMemObject(m_mem);
+ }
+
+ m_mem = other.m_mem;
+
+ if(m_mem){
+ clRetainMemObject(m_mem);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// \internal_
+ memory_object(memory_object&& other) BOOST_NOEXCEPT
+ : m_mem(other.m_mem)
+ {
+ other.m_mem = 0;
+ }
+
+ /// \internal_
+ memory_object& operator=(memory_object&& other) BOOST_NOEXCEPT
+ {
+ if(m_mem){
+ clReleaseMemObject(m_mem);
+ }
+
+ m_mem = other.m_mem;
+ other.m_mem = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// \internal_
+ ~memory_object()
+ {
+ if(m_mem){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseMemObject(m_mem)
+ );
+ }
+ }
+
+protected:
+ cl_mem m_mem;
+};
+
+namespace detail {
+
+// set_kernel_arg specialization for memory_object
+template<>
+struct set_kernel_arg<memory_object>
+{
+ void operator()(kernel &kernel_, size_t index, const memory_object &mem)
+ {
+ kernel_.set_arg(index, mem.get());
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_MEMORY_OBJECT_HPP
diff --git a/boost/compute/pipe.hpp b/boost/compute/pipe.hpp
new file mode 100644
index 0000000000..944674e622
--- /dev/null
+++ b/boost/compute/pipe.hpp
@@ -0,0 +1,154 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_PIPE_HPP
+#define BOOST_COMPUTE_PIPE_HPP
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/memory_object.hpp>
+#include <boost/compute/exception/opencl_error.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+
+// pipe objects require opencl 2.0
+#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+
+namespace boost {
+namespace compute {
+
+/// \class pipe
+/// \brief A FIFO data pipe
+///
+/// \opencl_version_warning{2,0}
+///
+/// \see memory_object
+class pipe : public memory_object
+{
+public:
+ /// Creates a null pipe object.
+ pipe()
+ : memory_object()
+ {
+ }
+
+ /// Creates a pipe object for \p mem. If \p retain is \c true, the
+ /// reference count for \p mem will be incremented.
+ explicit pipe(cl_mem mem, bool retain = true)
+ : memory_object(mem, retain)
+ {
+ }
+
+ /// Creates a new pipe in \p context.
+ pipe(const context &context,
+ uint_ pipe_packet_size,
+ uint_ pipe_max_packets,
+ cl_mem_flags flags = read_write,
+ const cl_pipe_properties *properties = 0)
+ {
+ cl_int error = 0;
+ m_mem = clCreatePipe(context,
+ flags,
+ pipe_packet_size,
+ pipe_max_packets,
+ properties,
+ &error);
+ if(!m_mem){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new pipe object as a copy of \p other.
+ pipe(const pipe &other)
+ : memory_object(other)
+ {
+ }
+
+ /// Copies the pipe object from \p other to \c *this.
+ pipe& operator=(const pipe &other)
+ {
+ if(this != &other){
+ memory_object::operator=(other);
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new pipe object from \p other.
+ pipe(pipe&& other) BOOST_NOEXCEPT
+ : memory_object(std::move(other))
+ {
+ }
+
+ /// Move-assigns the pipe from \p other to \c *this.
+ pipe& operator=(pipe&& other) BOOST_NOEXCEPT
+ {
+ memory_object::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the pipe object.
+ ~pipe()
+ {
+ }
+
+ /// Returns the packet size.
+ uint_ packet_size() const
+ {
+ return get_info<uint_>(CL_PIPE_PACKET_SIZE);
+ }
+
+ /// Returns the max number of packets.
+ uint_ max_packets() const
+ {
+ return get_info<uint_>(CL_PIPE_MAX_PACKETS);
+ }
+
+ /// Returns information about the pipe.
+ ///
+ /// \see_opencl2_ref{clGetPipeInfo}
+ template<class T>
+ T get_info(cl_pipe_info info) const
+ {
+ return detail::get_object_info<T>(clGetPipeInfo, m_mem, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<pipe, Enum>::type get_info() const;
+};
+
+/// \internal_ define get_info() specializations for pipe
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(pipe,
+ ((cl_uint, CL_PIPE_PACKET_SIZE))
+ ((cl_uint, CL_PIPE_MAX_PACKETS))
+)
+
+namespace detail {
+
+// set_kernel_arg specialization for pipe
+template<>
+struct set_kernel_arg<pipe>
+{
+ void operator()(kernel &kernel_, size_t index, const pipe &pipe_)
+ {
+ kernel_.set_arg(index, pipe_.get());
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // CL_VERSION_2_0
+
+#endif // BOOST_COMPUTE_PIPE_HPP
diff --git a/boost/compute/platform.hpp b/boost/compute/platform.hpp
new file mode 100644
index 0000000000..65fda84e11
--- /dev/null
+++ b/boost/compute/platform.hpp
@@ -0,0 +1,235 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_PLATFORM_HPP
+#define BOOST_COMPUTE_PLATFORM_HPP
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/detail/get_object_info.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class platform
+/// \brief A compute platform.
+///
+/// The platform class provides an interface to an OpenCL platform.
+///
+/// To obtain a list of all platforms on the system use the
+/// system::platforms() method.
+///
+/// \see device, context
+class platform
+{
+public:
+ /// Creates a new platform object for \p id.
+ explicit platform(cl_platform_id id)
+ : m_platform(id)
+ {
+ }
+
+ /// Creates a new platform as a copy of \p other.
+ platform(const platform &other)
+ : m_platform(other.m_platform)
+ {
+ }
+
+ /// Copies the platform id from \p other.
+ platform& operator=(const platform &other)
+ {
+ if(this != &other){
+ m_platform = other.m_platform;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the platform object.
+ ~platform()
+ {
+ }
+
+ /// Returns the ID of the platform.
+ cl_platform_id id() const
+ {
+ return m_platform;
+ }
+
+ /// Returns the name of the platform.
+ std::string name() const
+ {
+ return get_info<std::string>(CL_PLATFORM_NAME);
+ }
+
+ /// Returns the name of the vendor for the platform.
+ std::string vendor() const
+ {
+ return get_info<std::string>(CL_PLATFORM_VENDOR);
+ }
+
+ /// Returns the profile string for the platform.
+ std::string profile() const
+ {
+ return get_info<std::string>(CL_PLATFORM_PROFILE);
+ }
+
+ /// Returns the version string for the platform.
+ std::string version() const
+ {
+ return get_info<std::string>(CL_PLATFORM_VERSION);
+ }
+
+ /// Returns a list of extensions supported by the platform.
+ std::vector<std::string> extensions() const
+ {
+ std::string extensions_string =
+ get_info<std::string>(CL_PLATFORM_EXTENSIONS);
+ std::vector<std::string> extensions_vector;
+ boost::split(extensions_vector,
+ extensions_string,
+ boost::is_any_of("\t "),
+ boost::token_compress_on);
+ return extensions_vector;
+ }
+
+ /// Returns \c true if the platform supports the extension with
+ /// \p name.
+ bool supports_extension(const std::string &name) const
+ {
+ const std::vector<std::string> extensions = this->extensions();
+
+ return std::find(
+ extensions.begin(), extensions.end(), name) != extensions.end();
+ }
+
+ /// Returns a list of devices on the platform.
+ std::vector<device> devices(cl_device_type type = CL_DEVICE_TYPE_ALL) const
+ {
+ size_t count = device_count(type);
+ if(count == 0){
+ // no devices for this platform
+ return std::vector<device>();
+ }
+
+ std::vector<cl_device_id> device_ids(count);
+ cl_int ret = clGetDeviceIDs(m_platform,
+ type,
+ static_cast<cl_uint>(count),
+ &device_ids[0],
+ 0);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ std::vector<device> devices;
+ for(cl_uint i = 0; i < count; i++){
+ devices.push_back(device(device_ids[i]));
+ }
+
+ return devices;
+ }
+
+ /// Returns the number of devices on the platform.
+ size_t device_count(cl_device_type type = CL_DEVICE_TYPE_ALL) const
+ {
+ cl_uint count = 0;
+ cl_int ret = clGetDeviceIDs(m_platform, type, 0, 0, &count);
+ if(ret != CL_SUCCESS){
+ if(ret == CL_DEVICE_NOT_FOUND){
+ // no devices for this platform
+ return 0;
+ }
+ else {
+ // something else went wrong
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ return count;
+ }
+
+ /// Returns information about the platform.
+ ///
+ /// \see_opencl_ref{clGetPlatformInfo}
+ template<class T>
+ T get_info(cl_platform_info info) const
+ {
+ return detail::get_object_info<T>(clGetPlatformInfo, m_platform, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<platform, Enum>::type
+ get_info() const;
+
+ /// Returns the address of the \p function_name extension
+ /// function. Returns \c 0 if \p function_name is invalid.
+ void* get_extension_function_address(const char *function_name) const
+ {
+ #ifdef CL_VERSION_1_2
+ return clGetExtensionFunctionAddressForPlatform(m_platform,
+ function_name);
+ #else
+ return clGetExtensionFunctionAddress(function_name);
+ #endif
+ }
+
+ /// Requests that the platform unload any compiler resources.
+ void unload_compiler()
+ {
+ #ifdef CL_VERSION_1_2
+ clUnloadPlatformCompiler(m_platform);
+ #else
+ clUnloadCompiler();
+ #endif
+ }
+
+ /// Returns \c true if the platform is the same at \p other.
+ bool operator==(const platform &other) const
+ {
+ return m_platform == other.m_platform;
+ }
+
+ /// Returns \c true if the platform is different from \p other.
+ bool operator!=(const platform &other) const
+ {
+ return m_platform != other.m_platform;
+ }
+
+private:
+ cl_platform_id m_platform;
+};
+
+/// \internal_ define get_info() specializations for platform
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(platform,
+ ((std::string, CL_PLATFORM_PROFILE))
+ ((std::string, CL_PLATFORM_VERSION))
+ ((std::string, CL_PLATFORM_NAME))
+ ((std::string, CL_PLATFORM_VENDOR))
+ ((std::string, CL_PLATFORM_EXTENSIONS))
+)
+
+inline boost::compute::platform device::platform() const
+{
+ return boost::compute::platform(get_info<CL_DEVICE_PLATFORM>());
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_PLATFORM_HPP
diff --git a/boost/compute/program.hpp b/boost/compute/program.hpp
new file mode 100644
index 0000000000..7573aa02e6
--- /dev/null
+++ b/boost/compute/program.hpp
@@ -0,0 +1,650 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_PROGRAM_HPP
+#define BOOST_COMPUTE_PROGRAM_HPP
+
+#include <string>
+#include <vector>
+#include <fstream>
+#include <streambuf>
+
+#ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION
+#include <iostream>
+#endif
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/exception.hpp>
+#include <boost/compute/detail/assert_cl_success.hpp>
+
+#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+#include <sstream>
+#include <boost/optional.hpp>
+#include <boost/compute/platform.hpp>
+#include <boost/compute/detail/getenv.hpp>
+#include <boost/compute/detail/path.hpp>
+#include <boost/compute/detail/sha1.hpp>
+#endif
+
+namespace boost {
+namespace compute {
+
+class kernel;
+
+/// \class program
+/// \brief A compute program.
+///
+/// The program class represents an OpenCL program.
+///
+/// Program objects are created with one of the static \c create_with_*
+/// functions. For example, to create a program from a source string:
+///
+/// \snippet test/test_program.cpp create_with_source
+///
+/// And to create a program from a source file:
+/// \code
+/// boost::compute::program bar_program =
+/// boost::compute::program::create_with_source_file("/path/to/bar.cl", context);
+/// \endcode
+///
+/// Once a program object has been succesfully created, it can be compiled
+/// using the \c build() method:
+/// \code
+/// // build the program
+/// foo_program.build();
+/// \endcode
+///
+/// Once the program is built, \ref kernel objects can be created using the
+/// \c create_kernel() method by passing their name:
+/// \code
+/// // create a kernel from the compiled program
+/// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo");
+/// \endcode
+///
+/// \see kernel
+class program
+{
+public:
+ /// Creates a null program object.
+ program()
+ : m_program(0)
+ {
+ }
+
+ /// Creates a program object for \p program. If \p retain is \c true,
+ /// the reference count for \p program will be incremented.
+ explicit program(cl_program program, bool retain = true)
+ : m_program(program)
+ {
+ if(m_program && retain){
+ clRetainProgram(m_program);
+ }
+ }
+
+ /// Creates a new program object as a copy of \p other.
+ program(const program &other)
+ : m_program(other.m_program)
+ {
+ if(m_program){
+ clRetainProgram(m_program);
+ }
+ }
+
+ /// Copies the program object from \p other to \c *this.
+ program& operator=(const program &other)
+ {
+ if(this != &other){
+ if(m_program){
+ clReleaseProgram(m_program);
+ }
+
+ m_program = other.m_program;
+
+ if(m_program){
+ clRetainProgram(m_program);
+ }
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new program object from \p other.
+ program(program&& other) BOOST_NOEXCEPT
+ : m_program(other.m_program)
+ {
+ other.m_program = 0;
+ }
+
+ /// Move-assigns the program from \p other to \c *this.
+ program& operator=(program&& other) BOOST_NOEXCEPT
+ {
+ if(m_program){
+ clReleaseProgram(m_program);
+ }
+
+ m_program = other.m_program;
+ other.m_program = 0;
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the program object.
+ ~program()
+ {
+ if(m_program){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clReleaseProgram(m_program)
+ );
+ }
+ }
+
+ /// Returns the underlying OpenCL program.
+ cl_program& get() const
+ {
+ return const_cast<cl_program &>(m_program);
+ }
+
+ /// Returns the source code for the program.
+ std::string source() const
+ {
+ return get_info<std::string>(CL_PROGRAM_SOURCE);
+ }
+
+ /// Returns the binary for the program.
+ std::vector<unsigned char> binary() const
+ {
+ size_t binary_size = get_info<size_t>(CL_PROGRAM_BINARY_SIZES);
+ std::vector<unsigned char> binary(binary_size);
+
+ unsigned char *binary_ptr = &binary[0];
+ cl_int error = clGetProgramInfo(m_program,
+ CL_PROGRAM_BINARIES,
+ sizeof(unsigned char **),
+ &binary_ptr,
+ 0);
+ if(error != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ return binary;
+ }
+
+ std::vector<device> get_devices() const
+ {
+ std::vector<cl_device_id> device_ids =
+ get_info<std::vector<cl_device_id> >(CL_PROGRAM_DEVICES);
+
+ std::vector<device> devices;
+ for(size_t i = 0; i < device_ids.size(); i++){
+ devices.push_back(device(device_ids[i]));
+ }
+
+ return devices;
+ }
+
+ /// Returns the context for the program.
+ context get_context() const
+ {
+ return context(get_info<cl_context>(CL_PROGRAM_CONTEXT));
+ }
+
+ /// Returns information about the program.
+ ///
+ /// \see_opencl_ref{clGetProgramInfo}
+ template<class T>
+ T get_info(cl_program_info info) const
+ {
+ return detail::get_object_info<T>(clGetProgramInfo, m_program, info);
+ }
+
+ /// \overload
+ template<int Enum>
+ typename detail::get_object_info_type<program, Enum>::type
+ get_info() const;
+
+ /// Returns build information about the program.
+ ///
+ /// For example, this function can be used to retreive the options used
+ /// to build the program:
+ /// \code
+ /// std::string build_options =
+ /// program.get_build_info<std::string>(CL_PROGRAM_BUILD_OPTIONS);
+ /// \endcode
+ ///
+ /// \see_opencl_ref{clGetProgramInfo}
+ template<class T>
+ T get_build_info(cl_program_build_info info, const device &device) const
+ {
+ return detail::get_object_info<T>(clGetProgramBuildInfo, m_program, info, device.id());
+ }
+
+ /// Builds the program with \p options.
+ ///
+ /// If the program fails to compile, this function will throw an
+ /// opencl_error exception.
+ /// \code
+ /// try {
+ /// // attempt to compile to program
+ /// program.build();
+ /// }
+ /// catch(boost::compute::opencl_error &e){
+ /// // program failed to compile, print out the build log
+ /// std::cout << program.build_log() << std::endl;
+ /// }
+ /// \endcode
+ ///
+ /// \see_opencl_ref{clBuildProgram}
+ void build(const std::string &options = std::string())
+ {
+ const char *options_string = 0;
+
+ if(!options.empty()){
+ options_string = options.c_str();
+ }
+
+ cl_int ret = clBuildProgram(m_program, 0, 0, options_string, 0, 0);
+
+ #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION
+ if(ret != CL_SUCCESS){
+ // print the error, source code and build log
+ std::cerr << "Boost.Compute: "
+ << "kernel compilation failed (" << ret << ")\n"
+ << "--- source ---\n"
+ << source()
+ << "\n--- build log ---\n"
+ << build_log()
+ << std::endl;
+ }
+ #endif
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Compiles the program with \p options.
+ ///
+ /// \opencl_version_warning{1,2}
+ ///
+ /// \see_opencl_ref{clCompileProgram}
+ void compile(const std::string &options = std::string())
+ {
+ const char *options_string = 0;
+
+ if(!options.empty()){
+ options_string = options.c_str();
+ }
+
+ cl_int ret = clCompileProgram(
+ m_program, 0, 0, options_string, 0, 0, 0, 0, 0
+ );
+
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+
+ /// Links the programs in \p programs with \p options in \p context.
+ ///
+ /// \opencl_version_warning{1,2}
+ ///
+ /// \see_opencl_ref{clLinkProgram}
+ static program link(const std::vector<program> &programs,
+ const context &context,
+ const std::string &options = std::string())
+ {
+ const char *options_string = 0;
+
+ if(!options.empty()){
+ options_string = options.c_str();
+ }
+
+ cl_int ret;
+ cl_program program_ = clLinkProgram(
+ context.get(),
+ 0,
+ 0,
+ options_string,
+ static_cast<uint_>(programs.size()),
+ reinterpret_cast<const cl_program*>(&programs[0]),
+ 0,
+ 0,
+ &ret
+ );
+
+ if(!program_){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+
+ return program(program_, false);
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Returns the build log.
+ std::string build_log() const
+ {
+ return get_build_info<std::string>(CL_PROGRAM_BUILD_LOG, get_devices().front());
+ }
+
+ /// Creates and returns a new kernel object for \p name.
+ ///
+ /// For example, to create the \c "foo" kernel (after the program has been
+ /// created and built):
+ /// \code
+ /// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo");
+ /// \endcode
+ kernel create_kernel(const std::string &name) const;
+
+ /// Returns \c true if the program is the same at \p other.
+ bool operator==(const program &other) const
+ {
+ return m_program == other.m_program;
+ }
+
+ /// Returns \c true if the program is different from \p other.
+ bool operator!=(const program &other) const
+ {
+ return m_program != other.m_program;
+ }
+
+ /// \internal_
+ operator cl_program() const
+ {
+ return m_program;
+ }
+
+ /// Creates a new program with \p source in \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithSource}
+ static program create_with_source(const std::string &source,
+ const context &context)
+ {
+ const char *source_string = source.c_str();
+
+ cl_int error = 0;
+ cl_program program_ = clCreateProgramWithSource(context,
+ uint_(1),
+ &source_string,
+ 0,
+ &error);
+ if(!program_){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ return program(program_, false);
+ }
+
+ /// Creates a new program with \p sources in \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithSource}
+ static program create_with_source(const std::vector<std::string> &sources,
+ const context &context)
+ {
+ std::vector<const char*> source_strings(sources.size());
+ for(size_t i = 0; i < sources.size(); i++){
+ source_strings[i] = sources[i].c_str();
+ }
+
+ cl_int error = 0;
+ cl_program program_ = clCreateProgramWithSource(context,
+ uint_(sources.size()),
+ &source_strings[0],
+ 0,
+ &error);
+ if(!program_){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ return program(program_, false);
+ }
+
+ /// Creates a new program with \p file in \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithSource}
+ static program create_with_source_file(const std::string &file,
+ const context &context)
+ {
+ // open file stream
+ std::ifstream stream(file.c_str());
+
+ if(stream.fail()){
+ BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream."));
+ }
+
+ // read source
+ std::string source(
+ (std::istreambuf_iterator<char>(stream)),
+ std::istreambuf_iterator<char>()
+ );
+
+ // create program
+ return create_with_source(source, context);
+ }
+
+ /// Creates a new program with \p binary of \p binary_size in
+ /// \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithBinary}
+ static program create_with_binary(const unsigned char *binary,
+ size_t binary_size,
+ const context &context)
+ {
+ const cl_device_id device = context.get_device().id();
+
+ cl_int error = 0;
+ cl_int binary_status = 0;
+ cl_program program_ = clCreateProgramWithBinary(context,
+ uint_(1),
+ &device,
+ &binary_size,
+ &binary,
+ &binary_status,
+ &error);
+ if(!program_){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ if(binary_status != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(binary_status));
+ }
+
+ return program(program_, false);
+ }
+
+ /// Creates a new program with \p binary in \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithBinary}
+ static program create_with_binary(const std::vector<unsigned char> &binary,
+ const context &context)
+ {
+ return create_with_binary(&binary[0], binary.size(), context);
+ }
+
+ /// Creates a new program with \p file in \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithBinary}
+ static program create_with_binary_file(const std::string &file,
+ const context &context)
+ {
+ // open file stream
+ std::ifstream stream(file.c_str(), std::ios::in | std::ios::binary);
+
+ // read binary
+ std::vector<unsigned char> binary(
+ (std::istreambuf_iterator<char>(stream)),
+ std::istreambuf_iterator<char>()
+ );
+
+ // create program
+ return create_with_binary(&binary[0], binary.size(), context);
+ }
+
+ #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Creates a new program with the built-in kernels listed in
+ /// \p kernel_names for \p devices in \p context.
+ ///
+ /// \opencl_version_warning{1,2}
+ ///
+ /// \see_opencl_ref{clCreateProgramWithBuiltInKernels}
+ static program create_with_builtin_kernels(const context &context,
+ const std::vector<device> &devices,
+ const std::string &kernel_names)
+ {
+ cl_int error = 0;
+
+ cl_program program_ = clCreateProgramWithBuiltInKernels(
+ context.get(),
+ static_cast<uint_>(devices.size()),
+ reinterpret_cast<const cl_device_id *>(&devices[0]),
+ kernel_names.c_str(),
+ &error
+ );
+
+ if(!program_){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ return program(program_, false);
+ }
+ #endif // CL_VERSION_1_2
+
+ /// Create a new program with \p source in \p context and builds it with \p options.
+ /**
+ * In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined,
+ * the compiled binary is stored for reuse in the offline cache located in
+ * $HOME/.boost_compute on UNIX-like systems and in %APPDATA%/boost_compute
+ * on Windows.
+ */
+ static program build_with_source(
+ const std::string &source,
+ const context &context,
+ const std::string &options = std::string()
+ )
+ {
+#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+ // Get hash string for the kernel.
+ device d = context.get_device();
+ platform p = d.platform();
+
+ detail::sha1 hash;
+ hash.process( p.name() )
+ .process( p.version() )
+ .process( d.name() )
+ .process( options )
+ .process( source )
+ ;
+
+ // Try to get cached program binaries:
+ try {
+ boost::optional<program> prog = load_program_binary(hash, context);
+
+ if (prog) {
+ prog->build(options);
+ return *prog;
+ }
+ } catch (...) {
+ // Something bad happened. Fallback to normal compilation.
+ }
+
+ // Cache is apparently not available. Just compile the sources.
+#endif
+ const char *source_string = source.c_str();
+
+ cl_int error = 0;
+ cl_program program_ = clCreateProgramWithSource(context,
+ uint_(1),
+ &source_string,
+ 0,
+ &error);
+ if(!program_){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+
+ program prog(program_, false);
+ prog.build(options);
+
+#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+ // Save program binaries for future reuse.
+ save_program_binary(hash, prog);
+#endif
+
+ return prog;
+ }
+
+private:
+#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+ // Saves program binaries for future reuse.
+ static void save_program_binary(const std::string &hash, const program &prog)
+ {
+ std::string fname = detail::program_binary_path(hash, true) + "kernel";
+ std::ofstream bfile(fname.c_str(), std::ios::binary);
+ if (!bfile) return;
+
+ std::vector<unsigned char> binary = prog.binary();
+
+ size_t binary_size = binary.size();
+ bfile.write((char*)&binary_size, sizeof(size_t));
+ bfile.write((char*)binary.data(), binary_size);
+ }
+
+ // Tries to read program binaries from file cache.
+ static boost::optional<program> load_program_binary(
+ const std::string &hash, const context &ctx
+ )
+ {
+ std::string fname = detail::program_binary_path(hash) + "kernel";
+ std::ifstream bfile(fname.c_str(), std::ios::binary);
+ if (!bfile) return boost::optional<program>();
+
+ size_t binary_size;
+ std::vector<unsigned char> binary;
+
+ bfile.read((char*)&binary_size, sizeof(size_t));
+
+ binary.resize(binary_size);
+ bfile.read((char*)binary.data(), binary_size);
+
+ return boost::optional<program>(
+ program::create_with_binary(
+ binary.data(), binary_size, ctx
+ )
+ );
+ }
+#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE
+
+private:
+ cl_program m_program;
+};
+
+/// \internal_ define get_info() specializations for program
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program,
+ ((cl_uint, CL_PROGRAM_REFERENCE_COUNT))
+ ((cl_context, CL_PROGRAM_CONTEXT))
+ ((cl_uint, CL_PROGRAM_NUM_DEVICES))
+ ((std::vector<cl_device_id>, CL_PROGRAM_DEVICES))
+ ((std::string, CL_PROGRAM_SOURCE))
+ ((std::vector<size_t>, CL_PROGRAM_BINARY_SIZES))
+ ((std::vector<unsigned char *>, CL_PROGRAM_BINARIES))
+)
+
+#ifdef CL_VERSION_1_2
+BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program,
+ ((size_t, CL_PROGRAM_NUM_KERNELS))
+ ((std::string, CL_PROGRAM_KERNEL_NAMES))
+)
+#endif // CL_VERSION_1_2
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_PROGRAM_HPP
diff --git a/boost/compute/random.hpp b/boost/compute/random.hpp
new file mode 100644
index 0000000000..1a361fe48b
--- /dev/null
+++ b/boost/compute/random.hpp
@@ -0,0 +1,28 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_HPP
+#define BOOST_COMPUTE_RANDOM_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute random headers.
+
+#include <boost/compute/random/bernoulli_distribution.hpp>
+#include <boost/compute/random/default_random_engine.hpp>
+#include <boost/compute/random/discrete_distribution.hpp>
+#include <boost/compute/random/linear_congruential_engine.hpp>
+#include <boost/compute/random/mersenne_twister_engine.hpp>
+#include <boost/compute/random/threefry_engine.hpp>
+#include <boost/compute/random/normal_distribution.hpp>
+#include <boost/compute/random/uniform_int_distribution.hpp>
+#include <boost/compute/random/uniform_real_distribution.hpp>
+
+#endif // BOOST_COMPUTE_RANDOM_HPP
diff --git a/boost/compute/random/bernoulli_distribution.hpp b/boost/compute/random/bernoulli_distribution.hpp
new file mode 100644
index 0000000000..edd1125090
--- /dev/null
+++ b/boost/compute/random/bernoulli_distribution.hpp
@@ -0,0 +1,92 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP
+#define BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/literal.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// \class bernoulli_distribution
+/// \brief Produces random boolean values according to the following
+/// discrete probability function with parameter p :
+/// P(true/p) = p and P(false/p) = (1 - p)
+///
+/// The following example shows how to setup a bernoulli distribution to
+/// produce random boolean values with parameter p = 0.25
+///
+/// \snippet test/test_bernoulli_distribution.cpp generate
+///
+template<class RealType = float>
+class bernoulli_distribution
+{
+public:
+
+ /// Creates a new bernoulli distribution
+ bernoulli_distribution(RealType p = 0.5f)
+ : m_p(p)
+ {
+ }
+
+ /// Destroys the bernoulli_distribution object
+ ~bernoulli_distribution()
+ {
+ }
+
+ /// Returns the value of the parameter p
+ RealType p() const
+ {
+ return m_p;
+ }
+
+ /// Generates bernoulli distributed booleans and stores
+ /// them in the range [\p first, \p last).
+ template<class OutputIterator, class Generator>
+ void generate(OutputIterator first,
+ OutputIterator last,
+ Generator &generator,
+ command_queue &queue)
+ {
+ size_t count = detail::iterator_range_size(first, last);
+
+ vector<uint_> tmp(count, queue.get_context());
+ generator.generate(tmp.begin(), tmp.end(), queue);
+
+ BOOST_COMPUTE_FUNCTION(bool, scale_random, (const uint_ x),
+ {
+ return (convert_RealType(x) / MAX_RANDOM) < PARAM;
+ });
+
+ scale_random.define("PARAM", detail::make_literal(m_p));
+ scale_random.define("MAX_RANDOM", "UINT_MAX");
+ scale_random.define(
+ "convert_RealType", std::string("convert_") + type_name<RealType>()
+ );
+
+ transform(
+ tmp.begin(), tmp.end(), first, scale_random, queue
+ );
+ }
+
+private:
+ RealType m_p;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP
diff --git a/boost/compute/random/default_random_engine.hpp b/boost/compute/random/default_random_engine.hpp
new file mode 100644
index 0000000000..b34dbd01fa
--- /dev/null
+++ b/boost/compute/random/default_random_engine.hpp
@@ -0,0 +1,24 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP
+#define BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP
+
+#include <boost/compute/random/mersenne_twister_engine.hpp>
+
+namespace boost {
+namespace compute {
+
+typedef mt19937 default_random_engine;
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP
diff --git a/boost/compute/random/discrete_distribution.hpp b/boost/compute/random/discrete_distribution.hpp
new file mode 100644
index 0000000000..3707928f98
--- /dev/null
+++ b/boost/compute/random/discrete_distribution.hpp
@@ -0,0 +1,117 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP
+#define BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/algorithm/accumulate.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/detail/literal.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class discrete_distribution
+/// \brief Produces random integers on the interval [0, n), where
+/// probability of each integer is given by the weight of the ith
+/// integer divided by the sum of all weights.
+///
+/// The following example shows how to setup a discrete distribution to
+/// produce 0 and 1 with equal probability
+///
+/// \snippet test/test_discrete_distribution.cpp generate
+///
+template<class IntType = uint_>
+class discrete_distribution
+{
+public:
+ typedef IntType result_type;
+
+ /// Creates a new discrete distribution with weights given by
+ /// the range [\p first, \p last)
+ template<class InputIterator>
+ discrete_distribution(InputIterator first, InputIterator last)
+ : m_n(std::distance(first, last)),
+ m_probabilities(std::distance(first, last))
+ {
+ double sum = 0;
+
+ for(InputIterator iter = first; iter!=last; iter++)
+ {
+ sum += *iter;
+ }
+
+ for(size_t i=0; i<m_n; i++)
+ {
+ m_probabilities[i] = m_probabilities[i-1] + first[i]/sum;
+ }
+ }
+
+ /// Destroys the discrete_distribution object.
+ ~discrete_distribution()
+ {
+ }
+
+ /// Returns the value of n
+ result_type n() const
+ {
+ return m_n;
+ }
+
+ /// Returns the probabilities
+ ::std::vector<double> probabilities() const
+ {
+ return m_probabilities;
+ }
+
+ /// Generates uniformily distributed integers and stores
+ /// them to the range [\p first, \p last).
+ template<class OutputIterator, class Generator>
+ void generate(OutputIterator first,
+ OutputIterator last,
+ Generator &generator,
+ command_queue &queue)
+ {
+ std::string source = "inline uint scale_random(uint x)\n";
+
+ source = source +
+ "{\n" +
+ "float rno = convert_float(x) / UINT_MAX;\n";
+ for(size_t i=0; i<m_n; i++)
+ {
+ source = source +
+ "if(rno <= " + detail::make_literal<float>(m_probabilities[i]) + ")\n" +
+ " return " + detail::make_literal(i) + ";\n";
+ }
+
+ source = source +
+ "return " + detail::make_literal(m_n - 1) + ";\n" +
+ "}\n";
+
+ BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const uint_ x), {});
+
+ scale_random.set_source(source);
+
+ generator.generate(first, last, scale_random, queue);
+ }
+
+private:
+ size_t m_n;
+ ::std::vector<double> m_probabilities;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP
diff --git a/boost/compute/random/linear_congruential_engine.hpp b/boost/compute/random/linear_congruential_engine.hpp
new file mode 100644
index 0000000000..173a6c2881
--- /dev/null
+++ b/boost/compute/random/linear_congruential_engine.hpp
@@ -0,0 +1,238 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP
+#define BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP
+
+#include <algorithm>
+
+#include <boost/compute/types.hpp>
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+
+///
+/// \class linear_congruential_engine
+/// \brief 'Quick and Dirty' linear congruential engine
+///
+/// Quick and dirty linear congruential engine to generate low quality
+/// random numbers very quickly. For uses in which good quality of random
+/// numbers is required(Monte-Carlo Simulations), use other engines like
+/// Mersenne Twister instead.
+///
+template<class T = uint_>
+class linear_congruential_engine
+{
+public:
+ typedef T result_type;
+ static const T default_seed = 1;
+ static const T a = 1099087573;
+ static const size_t threads = 1024;
+
+ /// Creates a new linear_congruential_engine and seeds it with \p value.
+ explicit linear_congruential_engine(command_queue &queue,
+ result_type value = default_seed)
+ : m_context(queue.get_context()),
+ m_multiplicands(m_context, threads * sizeof(result_type))
+ {
+ // setup program
+ load_program();
+
+ // seed state
+ seed(value, queue);
+
+ // generate multiplicands
+ generate_multiplicands(queue);
+ }
+
+ /// Creates a new linear_congruential_engine object as a copy of \p other.
+ linear_congruential_engine(const linear_congruential_engine<T> &other)
+ : m_context(other.m_context),
+ m_program(other.m_program),
+ m_seed(other.m_seed),
+ m_multiplicands(other.m_multiplicands)
+ {
+ }
+
+ /// Copies \p other to \c *this.
+ linear_congruential_engine<T>&
+ operator=(const linear_congruential_engine<T> &other)
+ {
+ if(this != &other){
+ m_context = other.m_context;
+ m_program = other.m_program;
+ m_seed = other.m_seed;
+ m_multiplicands = other.m_multiplicands;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the linear_congruential_engine object.
+ ~linear_congruential_engine()
+ {
+ }
+
+ /// Seeds the random number generator with \p value.
+ ///
+ /// \param value seed value for the random-number generator
+ /// \param queue command queue to perform the operation
+ ///
+ /// If no seed value is provided, \c default_seed is used.
+ void seed(result_type value, command_queue &queue)
+ {
+ (void) queue;
+
+ m_seed = value;
+ }
+
+ /// \overload
+ void seed(command_queue &queue)
+ {
+ seed(default_seed, queue);
+ }
+
+ /// Generates random numbers and stores them to the range [\p first, \p last).
+ template<class OutputIterator>
+ void generate(OutputIterator first, OutputIterator last, command_queue &queue)
+ {
+ size_t size = detail::iterator_range_size(first, last);
+
+ kernel fill_kernel(m_program, "fill");
+ fill_kernel.set_arg(1, m_multiplicands);
+ fill_kernel.set_arg(2, first.get_buffer());
+
+ size_t offset = 0;
+
+ for(;;){
+ size_t count = 0;
+ if(size > threads){
+ count = (std::min)(static_cast<size_t>(threads), size - offset);
+ }
+ else {
+ count = size;
+ }
+ fill_kernel.set_arg(0, static_cast<const uint_>(m_seed));
+ fill_kernel.set_arg(3, static_cast<const uint_>(offset));
+ queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0);
+
+ offset += count;
+
+ if(offset >= size){
+ break;
+ }
+
+ update_seed(queue);
+ }
+ }
+
+ /// \internal_
+ void generate(discard_iterator first, discard_iterator last, command_queue &queue)
+ {
+ (void) queue;
+
+ size_t size = detail::iterator_range_size(first, last);
+ uint_ max_mult =
+ detail::read_single_value<T>(m_multiplicands, threads-1, queue);
+ while(size >= threads) {
+ m_seed *= max_mult;
+ size -= threads;
+ }
+ m_seed *=
+ detail::read_single_value<T>(m_multiplicands, size-1, queue);
+ }
+
+ /// Generates random numbers, transforms them with \p op, and then stores
+ /// them to the range [\p first, \p last).
+ template<class OutputIterator, class Function>
+ void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue)
+ {
+ vector<T> tmp(std::distance(first, last), queue.get_context());
+ generate(tmp.begin(), tmp.end(), queue);
+ transform(tmp.begin(), tmp.end(), first, op, queue);
+ }
+
+ /// Generates \p z random numbers and discards them.
+ void discard(size_t z, command_queue &queue)
+ {
+ generate(discard_iterator(0), discard_iterator(z), queue);
+ }
+
+private:
+ /// \internal_
+ /// Generates the multiplicands for each thread
+ void generate_multiplicands(command_queue &queue)
+ {
+ kernel multiplicand_kernel =
+ m_program.create_kernel("multiplicand");
+ multiplicand_kernel.set_arg(0, m_multiplicands);
+
+ queue.enqueue_task(multiplicand_kernel);
+ }
+
+ /// \internal_
+ void update_seed(command_queue &queue)
+ {
+ m_seed *=
+ detail::read_single_value<T>(m_multiplicands, threads-1, queue);
+ }
+
+ /// \internal_
+ void load_program()
+ {
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(m_context);
+
+ std::string cache_key =
+ std::string("__boost_linear_congruential_engine_") + type_name<T>();
+
+ const char source[] =
+ "__kernel void multiplicand(__global uint *multiplicands)\n"
+ "{\n"
+ " uint a = 1099087573;\n"
+ " multiplicands[0] = a;\n"
+ " for(uint i = 1; i < 1024; i++){\n"
+ " multiplicands[i] = a * multiplicands[i-1];\n"
+ " }\n"
+ "}\n"
+
+ "__kernel void fill(const uint seed,\n"
+ " __global uint *multiplicands,\n"
+ " __global uint *result,"
+ " const uint offset)\n"
+ "{\n"
+ " const uint i = get_global_id(0);\n"
+ " result[offset+i] = seed * multiplicands[i];\n"
+ "}\n";
+
+ m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
+ }
+
+private:
+ context m_context;
+ program m_program;
+ T m_seed;
+ buffer m_multiplicands;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP
diff --git a/boost/compute/random/mersenne_twister_engine.hpp b/boost/compute/random/mersenne_twister_engine.hpp
new file mode 100644
index 0000000000..db8560e53d
--- /dev/null
+++ b/boost/compute/random/mersenne_twister_engine.hpp
@@ -0,0 +1,254 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP
+#define BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP
+
+#include <algorithm>
+
+#include <boost/compute/types.hpp>
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class mersenne_twister_engine
+/// \brief Mersenne twister pseudorandom number generator.
+template<class T>
+class mersenne_twister_engine
+{
+public:
+ typedef T result_type;
+ static const T default_seed = 5489U;
+ static const T n = 624;
+ static const T m = 397;
+
+ /// Creates a new mersenne_twister_engine and seeds it with \p value.
+ explicit mersenne_twister_engine(command_queue &queue,
+ result_type value = default_seed)
+ : m_context(queue.get_context()),
+ m_state_buffer(m_context, n * sizeof(result_type))
+ {
+ // setup program
+ load_program();
+
+ // seed state
+ seed(value, queue);
+ }
+
+ /// Creates a new mersenne_twister_engine object as a copy of \p other.
+ mersenne_twister_engine(const mersenne_twister_engine<T> &other)
+ : m_context(other.m_context),
+ m_state_index(other.m_state_index),
+ m_program(other.m_program),
+ m_state_buffer(other.m_state_buffer)
+ {
+ }
+
+ /// Copies \p other to \c *this.
+ mersenne_twister_engine<T>& operator=(const mersenne_twister_engine<T> &other)
+ {
+ if(this != &other){
+ m_context = other.m_context;
+ m_state_index = other.m_state_index;
+ m_program = other.m_program;
+ m_state_buffer = other.m_state_buffer;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the mersenne_twister_engine object.
+ ~mersenne_twister_engine()
+ {
+ }
+
+ /// Seeds the random number generator with \p value.
+ ///
+ /// \param value seed value for the random-number generator
+ /// \param queue command queue to perform the operation
+ ///
+ /// If no seed value is provided, \c default_seed is used.
+ void seed(result_type value, command_queue &queue)
+ {
+ kernel seed_kernel = m_program.create_kernel("seed");
+ seed_kernel.set_arg(0, value);
+ seed_kernel.set_arg(1, m_state_buffer);
+
+ queue.enqueue_task(seed_kernel);
+
+ m_state_index = 0;
+ }
+
+ /// \overload
+ void seed(command_queue &queue)
+ {
+ seed(default_seed, queue);
+ }
+
+ /// Generates random numbers and stores them to the range [\p first, \p last).
+ template<class OutputIterator>
+ void generate(OutputIterator first, OutputIterator last, command_queue &queue)
+ {
+ const size_t size = detail::iterator_range_size(first, last);
+
+ kernel fill_kernel(m_program, "fill");
+ fill_kernel.set_arg(0, m_state_buffer);
+ fill_kernel.set_arg(2, first.get_buffer());
+
+ size_t offset = 0;
+ size_t &p = m_state_index;
+
+ for(;;){
+ size_t count = 0;
+ if(size > n){
+ count = (std::min)(static_cast<size_t>(n), size - offset);
+ }
+ else {
+ count = size;
+ }
+ fill_kernel.set_arg(1, static_cast<const uint_>(p));
+ fill_kernel.set_arg(3, static_cast<const uint_>(offset));
+ queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0);
+
+ p += count;
+ offset += count;
+
+ if(offset >= size){
+ break;
+ }
+
+ generate_state(queue);
+ p = 0;
+ }
+ }
+
+ /// \internal_
+ void generate(discard_iterator first, discard_iterator last, command_queue &queue)
+ {
+ (void) queue;
+
+ m_state_index += std::distance(first, last);
+ }
+
+ /// Generates random numbers, transforms them with \p op, and then stores
+ /// them to the range [\p first, \p last).
+ template<class OutputIterator, class Function>
+ void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue)
+ {
+ vector<T> tmp(std::distance(first, last), queue.get_context());
+ generate(tmp.begin(), tmp.end(), queue);
+ transform(tmp.begin(), tmp.end(), first, op, queue);
+ }
+
+ /// Generates \p z random numbers and discards them.
+ void discard(size_t z, command_queue &queue)
+ {
+ generate(discard_iterator(0), discard_iterator(z), queue);
+ }
+
+ /// \internal_ (deprecated)
+ template<class OutputIterator>
+ void fill(OutputIterator first, OutputIterator last, command_queue &queue)
+ {
+ generate(first, last, queue);
+ }
+
+private:
+ /// \internal_
+ void generate_state(command_queue &queue)
+ {
+ kernel generate_state_kernel =
+ m_program.create_kernel("generate_state");
+ generate_state_kernel.set_arg(0, m_state_buffer);
+ queue.enqueue_task(generate_state_kernel);
+ }
+
+ /// \internal_
+ void load_program()
+ {
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(m_context);
+
+ std::string cache_key =
+ std::string("__boost_mersenne_twister_engine_") + type_name<T>();
+
+ const char source[] =
+ "static uint twiddle(uint u, uint v)\n"
+ "{\n"
+ " return (((u & 0x80000000U) | (v & 0x7FFFFFFFU)) >> 1) ^\n"
+ " ((v & 1U) ? 0x9908B0DFU : 0x0U);\n"
+ "}\n"
+
+ "__kernel void generate_state(__global uint *state)\n"
+ "{\n"
+ " const uint n = 624;\n"
+ " const uint m = 397;\n"
+ " for(uint i = 0; i < (n - m); i++)\n"
+ " state[i] = state[i+m] ^ twiddle(state[i], state[i+1]);\n"
+ " for(uint i = n - m; i < (n - 1); i++)\n"
+ " state[i] = state[i+m-n] ^ twiddle(state[i], state[i+1]);\n"
+ " state[n-1] = state[m-1] ^ twiddle(state[n-1], state[0]);\n"
+ "}\n"
+
+ "__kernel void seed(const uint s, __global uint *state)\n"
+ "{\n"
+ " const uint n = 624;\n"
+ " state[0] = s & 0xFFFFFFFFU;\n"
+ " for(uint i = 1; i < n; i++){\n"
+ " state[i] = 1812433253U * (state[i-1] ^ (state[i-1] >> 30)) + i;\n"
+ " state[i] &= 0xFFFFFFFFU;\n"
+ " }\n"
+ " generate_state(state);\n"
+ "}\n"
+
+ "static uint random_number(__global uint *state, const uint p)\n"
+ "{\n"
+ " uint x = state[p];\n"
+ " x ^= (x >> 11);\n"
+ " x ^= (x << 7) & 0x9D2C5680U;\n"
+ " x ^= (x << 15) & 0xEFC60000U;\n"
+ " return x ^ (x >> 18);\n"
+ "}\n"
+
+ "__kernel void fill(__global uint *state,\n"
+ " const uint state_index,\n"
+ " __global uint *vector,\n"
+ " const uint offset)\n"
+ "{\n"
+ " const uint i = get_global_id(0);\n"
+ " vector[offset+i] = random_number(state, state_index + i);\n"
+ "}\n";
+
+ m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
+ }
+
+private:
+ context m_context;
+ size_t m_state_index;
+ program m_program;
+ buffer m_state_buffer;
+};
+
+typedef mersenne_twister_engine<uint_> mt19937;
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP
diff --git a/boost/compute/random/normal_distribution.hpp b/boost/compute/random/normal_distribution.hpp
new file mode 100644
index 0000000000..d025faeb2e
--- /dev/null
+++ b/boost/compute/random/normal_distribution.hpp
@@ -0,0 +1,124 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP
+#define BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP
+
+#include <limits>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/type_traits/make_vector_type.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class normal_distribution
+/// \brief Produces random, normally-distributed floating-point numbers.
+///
+/// The following example shows how to setup a normal distribution to
+/// produce random \c float values centered at \c 5:
+///
+/// \snippet test/test_normal_distribution.cpp generate
+///
+/// \see default_random_engine, uniform_real_distribution
+template<class RealType = float>
+class normal_distribution
+{
+public:
+ typedef RealType result_type;
+
+ /// Creates a new normal distribution producing numbers with the given
+ /// \p mean and \p stddev.
+ normal_distribution(RealType mean = 0.f, RealType stddev = 1.f)
+ : m_mean(mean),
+ m_stddev(stddev)
+ {
+ }
+
+ /// Destroys the normal distribution object.
+ ~normal_distribution()
+ {
+ }
+
+ /// Returns the mean value of the distribution.
+ result_type mean() const
+ {
+ return m_mean;
+ }
+
+ /// Returns the standard-deviation of the distribution.
+ result_type stddev() const
+ {
+ return m_stddev;
+ }
+
+ /// Returns the minimum value of the distribution.
+ result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const
+ {
+ return -std::numeric_limits<RealType>::infinity();
+ }
+
+ /// Returns the maximum value of the distribution.
+ result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const
+ {
+ return std::numeric_limits<RealType>::infinity();
+ }
+
+ /// Generates normally-distributed floating-point numbers and stores
+ /// them to the range [\p first, \p last).
+ template<class OutputIterator, class Generator>
+ void generate(OutputIterator first,
+ OutputIterator last,
+ Generator &generator,
+ command_queue &queue)
+ {
+ typedef typename make_vector_type<RealType, 2>::type RealType2;
+
+ size_t count = detail::iterator_range_size(first, last);
+
+ vector<uint_> tmp(count, queue.get_context());
+ generator.generate(tmp.begin(), tmp.end(), queue);
+
+ BOOST_COMPUTE_FUNCTION(RealType2, box_muller, (const uint2_ x),
+ {
+ const RealType x1 = x.x / (RealType) (UINT_MAX - 1);
+ const RealType x2 = x.y / (RealType) (UINT_MAX - 1);
+
+ const RealType z1 = sqrt(-2.f * log2(x1)) * cos(2.f * M_PI_F * x2);
+ const RealType z2 = sqrt(-2.f * log2(x1)) * sin(2.f * M_PI_F * x2);
+
+ return (RealType2)(MEAN, MEAN) + (RealType2)(z1, z2) * (RealType2)(STDDEV, STDDEV);
+ });
+
+ box_muller.define("MEAN", boost::lexical_cast<std::string>(m_mean));
+ box_muller.define("STDDEV", boost::lexical_cast<std::string>(m_stddev));
+ box_muller.define("RealType", type_name<RealType>());
+ box_muller.define("RealType2", type_name<RealType2>());
+
+ transform(
+ make_buffer_iterator<uint2_>(tmp.get_buffer(), 0),
+ make_buffer_iterator<uint2_>(tmp.get_buffer(), count / 2),
+ make_buffer_iterator<RealType2>(first.get_buffer(), 0),
+ box_muller,
+ queue
+ );
+ }
+
+private:
+ RealType m_mean;
+ RealType m_stddev;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP
diff --git a/boost/compute/random/threefry_engine.hpp b/boost/compute/random/threefry_engine.hpp
new file mode 100644
index 0000000000..917bb72c06
--- /dev/null
+++ b/boost/compute/random/threefry_engine.hpp
@@ -0,0 +1,311 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
+#define BOOST_COMPUTE_RANDOM_THREEFRY_HPP
+
+#include <algorithm>
+
+#include <boost/compute/types.hpp>
+#include <boost/compute/buffer.hpp>
+#include <boost/compute/kernel.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class threefry_engine
+/// \brief Threefry pseudorandom number generator.
+template<class T = uint_>
+class threefry_engine
+{
+public:
+ static const size_t threads = 1024;
+ typedef T result_type;
+
+ /// Creates a new threefry_engine and seeds it with \p value.
+ explicit threefry_engine(command_queue &queue)
+ : m_context(queue.get_context())
+ {
+ // setup program
+ load_program();
+ }
+
+ /// Creates a new threefry_engine object as a copy of \p other.
+ threefry_engine(const threefry_engine<T> &other)
+ : m_context(other.m_context),
+ m_program(other.m_program)
+ {
+ }
+
+ /// Copies \p other to \c *this.
+ threefry_engine<T>& operator=(const threefry_engine<T> &other)
+ {
+ if(this != &other){
+ m_context = other.m_context;
+ m_program = other.m_program;
+ }
+
+ return *this;
+ }
+
+ /// Destroys the threefry_engine object.
+ ~threefry_engine()
+ {
+ }
+
+private:
+ /// \internal_
+ void load_program()
+ {
+ boost::shared_ptr<program_cache> cache =
+ program_cache::get_global_cache(m_context);
+ std::string cache_key =
+ std::string("threefry_engine_32x2");
+
+ // Copyright 2010-2012, D. E. Shaw Research.
+ // All rights reserved.
+
+ // Redistribution and use in source and binary forms, with or without
+ // modification, are permitted provided that the following conditions are
+ // met:
+
+ // * Redistributions of source code must retain the above copyright
+ // notice, this list of conditions, and the following disclaimer.
+
+ // * Redistributions in binary form must reproduce the above copyright
+ // notice, this list of conditions, and the following disclaimer in the
+ // documentation and/or other materials provided with the distribution.
+
+ // * Neither the name of D. E. Shaw Research nor the names of its
+ // contributors may be used to endorse or promote products derived from
+ // this software without specific prior written permission.
+
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ const char source[] =
+ "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
+ "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"
+
+ "enum r123_enum_threefry32x2 {\n"
+ " R_32x2_0_0=13,\n"
+ " R_32x2_1_0=15,\n"
+ " R_32x2_2_0=26,\n"
+ " R_32x2_3_0= 6,\n"
+ " R_32x2_4_0=17,\n"
+ " R_32x2_5_0=29,\n"
+ " R_32x2_6_0=16,\n"
+ " R_32x2_7_0=24\n"
+ "};\n"
+
+ "static uint RotL_32(uint x, uint N)\n"
+ "{\n"
+ " return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
+ "}\n"
+
+ "struct r123array2x32 {\n"
+ " uint v[2];\n"
+ "};\n"
+ "typedef struct r123array2x32 threefry2x32_ctr_t;\n"
+ "typedef struct r123array2x32 threefry2x32_key_t;\n"
+
+ "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
+ "{\n"
+ " threefry2x32_ctr_t X;\n"
+ " uint ks[3];\n"
+ " uint i; \n"
+ " ks[2] = SKEIN_KS_PARITY_32;\n"
+ " for (i=0;i < 2; i++) {\n"
+ " ks[i] = k.v[i];\n"
+ " X.v[i] = in.v[i];\n"
+ " ks[2] ^= k.v[i];\n"
+ " }\n"
+ " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
+ " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>3){\n"
+ " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
+ " X.v[1] += 1;\n"
+ " }\n"
+ " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>7){\n"
+ " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
+ " X.v[1] += 2;\n"
+ " }\n"
+ " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>11){\n"
+ " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
+ " X.v[1] += 3;\n"
+ " }\n"
+ " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>15){\n"
+ " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
+ " X.v[1] += 4;\n"
+ " }\n"
+ " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>19){\n"
+ " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
+ " X.v[1] += 5;\n"
+ " }\n"
+ " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>23){\n"
+ " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
+ " X.v[1] += 6;\n"
+ " }\n"
+ " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>27){\n"
+ " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
+ " X.v[1] += 7;\n"
+ " }\n"
+ " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
+ " if(Nrounds>31){\n"
+ " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
+ " X.v[1] += 8;\n"
+ " }\n"
+ " return X;\n"
+ "}\n"
+
+ "__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint offset) {\n"
+ " threefry2x32_ctr_t in;\n"
+ " threefry2x32_key_t k;\n"
+ " const uint i = get_global_id(0);\n"
+ " in.v[0] = ctr[2 * (offset + i)];\n"
+ " in.v[1] = ctr[2 * (offset + i) + 1];\n"
+ " k.v[0] = key[2 * (offset + i)];\n"
+ " k.v[1] = key[2 * (offset + i) + 1];\n"
+ " in = threefry2x32_R(20, in, k);\n"
+ " ctr[2 * (offset + i)] = in.v[0];\n"
+ " ctr[2 * (offset + i) + 1] = in.v[1];\n"
+ "}\n";
+
+ m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
+ }
+
+public:
+
+
+ /// Generates Threefry random numbers using both the counter and key values, and then stores
+ /// them to the range [\p first_ctr, \p last_ctr).
+ template<class OutputIterator>
+ void generate(OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key, command_queue &queue) {
+ const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr);
+ const size_t size_key = detail::iterator_range_size(first_key, last_key);
+ if(!size_ctr || !size_key || (size_ctr != size_key)) {
+ return;
+ }
+ kernel rng_kernel = m_program.create_kernel("generate_rng");
+
+ rng_kernel.set_arg(0, first_ctr.get_buffer());
+ rng_kernel.set_arg(1, first_key.get_buffer());
+ size_t offset = 0;
+
+ for(;;){
+ size_t count = 0;
+ size_t size = size_ctr/2;
+ if(size > threads){
+ count = (std::min)(static_cast<size_t>(threads), size - offset);
+ }
+ else {
+ count = size;
+ }
+ rng_kernel.set_arg(2, static_cast<const uint_>(offset));
+ queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0);
+
+ offset += count;
+
+ if(offset >= size){
+ break;
+ }
+
+ }
+ }
+
+ template<class OutputIterator>
+ void generate(OutputIterator first_ctr, OutputIterator last_ctr, command_queue &queue) {
+ const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr);
+ if(!size_ctr) {
+ return;
+ }
+ boost::compute::vector<uint_> vector_key(size_ctr, m_context);
+ vector_key.assign(size_ctr, 0, queue);
+ kernel rng_kernel = m_program.create_kernel("generate_rng");
+
+ rng_kernel.set_arg(0, first_ctr.get_buffer());
+ rng_kernel.set_arg(1, vector_key);
+ size_t offset = 0;
+
+ for(;;){
+ size_t count = 0;
+ size_t size = size_ctr/2;
+ if(size > threads){
+ count = (std::min)(static_cast<size_t>(threads), size - offset);
+ }
+ else {
+ count = size;
+ }
+ rng_kernel.set_arg(2, static_cast<const uint_>(offset));
+ queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0);
+
+ offset += count;
+
+ if(offset >= size){
+ break;
+ }
+
+ }
+ }
+private:
+ context m_context;
+ program m_program;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP
diff --git a/boost/compute/random/uniform_int_distribution.hpp b/boost/compute/random/uniform_int_distribution.hpp
new file mode 100644
index 0000000000..92e8b3305f
--- /dev/null
+++ b/boost/compute/random/uniform_int_distribution.hpp
@@ -0,0 +1,111 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP
+#define BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP
+
+#include <limits>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class uniform_int_distribution
+/// \brief Produces uniformily distributed random integers
+///
+/// The following example shows how to setup a uniform int distribution to
+/// produce random integers 0 and 1.
+///
+/// \snippet test/test_uniform_int_distribution.cpp generate
+///
+template<class IntType = uint_>
+class uniform_int_distribution
+{
+public:
+ typedef IntType result_type;
+
+ /// Creates a new uniform distribution producing numbers in the range
+ /// [\p a, \p b].
+ explicit uniform_int_distribution(IntType a = 0,
+ IntType b = (std::numeric_limits<IntType>::max)())
+ : m_a(a),
+ m_b(b)
+ {
+ }
+
+ /// Destroys the uniform_int_distribution object.
+ ~uniform_int_distribution()
+ {
+ }
+
+ /// Returns the minimum value of the distribution.
+ result_type a() const
+ {
+ return m_a;
+ }
+
+ /// Returns the maximum value of the distribution.
+ result_type b() const
+ {
+ return m_b;
+ }
+
+ /// Generates uniformily distributed integers and stores
+ /// them to the range [\p first, \p last).
+ template<class OutputIterator, class Generator>
+ void generate(OutputIterator first,
+ OutputIterator last,
+ Generator &generator,
+ command_queue &queue)
+ {
+ size_t size = std::distance(first, last);
+ typedef typename Generator::result_type g_result_type;
+
+ vector<g_result_type> tmp(size, queue.get_context());
+ vector<g_result_type> tmp2(size, queue.get_context());
+
+ uint_ bound = ((uint_(-1))/(m_b-m_a+1))*(m_b-m_a+1);
+
+ buffer_iterator<g_result_type> tmp2_iter;
+
+ while(size>0)
+ {
+ generator.generate(tmp.begin(), tmp.begin() + size, queue);
+ tmp2_iter = copy_if(tmp.begin(), tmp.begin() + size, tmp2.begin(),
+ _1 <= bound, queue);
+ size = std::distance(tmp2_iter, tmp2.end());
+ }
+
+ BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const g_result_type x),
+ {
+ return LO + (x % (HI-LO+1));
+ });
+
+ scale_random.define("LO", boost::lexical_cast<std::string>(m_a));
+ scale_random.define("HI", boost::lexical_cast<std::string>(m_b));
+
+ transform(tmp2.begin(), tmp2.end(), first, scale_random, queue);
+ }
+
+private:
+ IntType m_a;
+ IntType m_b;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP
diff --git a/boost/compute/random/uniform_real_distribution.hpp b/boost/compute/random/uniform_real_distribution.hpp
new file mode 100644
index 0000000000..231b0dba01
--- /dev/null
+++ b/boost/compute/random/uniform_real_distribution.hpp
@@ -0,0 +1,105 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP
+#define BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/function.hpp>
+#include <boost/compute/detail/literal.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class uniform_real_distribution
+/// \brief Produces uniformily distributed random floating-point numbers.
+///
+/// The following example shows how to setup a uniform real distribution to
+/// produce random \c float values between \c 1 and \c 100.
+///
+/// \snippet test/test_uniform_real_distribution.cpp generate
+///
+/// \see default_random_engine, normal_distribution
+template<class RealType = float>
+class uniform_real_distribution
+{
+public:
+ typedef RealType result_type;
+
+ /// Creates a new uniform distribution producing numbers in the range
+ /// [\p a, \p b).
+ uniform_real_distribution(RealType a = 0.f, RealType b = 1.f)
+ : m_a(a),
+ m_b(b)
+ {
+ }
+
+ /// Destroys the uniform_real_distribution object.
+ ~uniform_real_distribution()
+ {
+ }
+
+ /// Returns the minimum value of the distribution.
+ result_type a() const
+ {
+ return m_a;
+ }
+
+ /// Returns the maximum value of the distribution.
+ result_type b() const
+ {
+ return m_b;
+ }
+
+ /// Generates uniformily distributed floating-point numbers and stores
+ /// them to the range [\p first, \p last).
+ template<class OutputIterator, class Generator>
+ void generate(OutputIterator first,
+ OutputIterator last,
+ Generator &generator,
+ command_queue &queue)
+ {
+ BOOST_COMPUTE_FUNCTION(RealType, scale_random, (const uint_ x),
+ {
+ return LO + (convert_RealType(x) / MAX_RANDOM) * (HI - LO);
+ });
+
+ scale_random.define("LO", detail::make_literal(m_a));
+ scale_random.define("HI", detail::make_literal(m_b));
+ scale_random.define("MAX_RANDOM", "UINT_MAX");
+ scale_random.define(
+ "convert_RealType", std::string("convert_") + type_name<RealType>()
+ );
+
+ generator.generate(
+ first, last, scale_random, queue
+ );
+ }
+
+ /// \internal_ (deprecated)
+ template<class OutputIterator, class Generator>
+ void fill(OutputIterator first,
+ OutputIterator last,
+ Generator &g,
+ command_queue &queue)
+ {
+ generate(first, last, g, queue);
+ }
+
+private:
+ RealType m_a;
+ RealType m_b;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP
diff --git a/boost/compute/source.hpp b/boost/compute/source.hpp
new file mode 100644
index 0000000000..844dfa49ea
--- /dev/null
+++ b/boost/compute/source.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/utility/source.hpp> instead
+#include <boost/compute/utility/source.hpp>
diff --git a/boost/compute/svm.hpp b/boost/compute/svm.hpp
new file mode 100644
index 0000000000..d03c8d9079
--- /dev/null
+++ b/boost/compute/svm.hpp
@@ -0,0 +1,62 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_SVM_HPP
+#define BOOST_COMPUTE_SVM_HPP
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+
+// svm functions require opencl 2.0
+#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+
+namespace boost {
+namespace compute {
+
+/// Allocates a shared virtual memory (SVM) buffer.
+//
+/// \opencl_version_warning{2,0}
+///
+/// \see_opencl2_ref{clSVMAlloc}
+///
+/// \see svm_free()
+template<class T>
+inline svm_ptr<T> svm_alloc(const context &context,
+ size_t size,
+ cl_svm_mem_flags flags = CL_MEM_READ_WRITE,
+ unsigned int alignment = 0)
+{
+ svm_ptr<T> ptr(clSVMAlloc(context.get(), flags, size * sizeof(T), alignment));
+ if(!ptr.get()){
+ BOOST_THROW_EXCEPTION(opencl_error(CL_MEM_OBJECT_ALLOCATION_FAILURE));
+ }
+ return ptr;
+}
+
+/// Deallocates a shared virtual memory (SVM) buffer.
+///
+/// \opencl_version_warning{2,0}
+///
+/// \see_opencl2_ref{clSVMFree}
+///
+/// \see svm_alloc(), command_queue::enqueue_svm_free()
+template<class T>
+inline void svm_free(const context &context, svm_ptr<T> ptr)
+{
+ clSVMFree(context.get(), ptr.get());
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // CL_VERSION_2_0
+
+#endif // BOOST_COMPUTE_PIPE_HPP
diff --git a/boost/compute/system.hpp b/boost/compute/system.hpp
new file mode 100644
index 0000000000..f205fece7d
--- /dev/null
+++ b/boost/compute/system.hpp
@@ -0,0 +1,278 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_SYSTEM_HPP
+#define BOOST_COMPUTE_SYSTEM_HPP
+
+#include <string>
+#include <vector>
+#include <cstdlib>
+
+#include <boost/throw_exception.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/device.hpp>
+#include <boost/compute/context.hpp>
+#include <boost/compute/platform.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/getenv.hpp>
+#include <boost/compute/exception/no_device_found.hpp>
+
+namespace boost {
+namespace compute {
+
+/// \class system
+/// \brief Provides access to platforms and devices on the system.
+///
+/// The system class contains a set of static functions which provide access to
+/// the OpenCL platforms and compute devices on the host system.
+///
+/// The default_device() convenience method automatically selects and returns
+/// the "best" compute device for the system following a set of heuristics and
+/// environment variables. This simplifies setup of the OpenCL enviornment.
+///
+/// \see platform, device, context
+class system
+{
+public:
+ /// Returns the default compute device for the system.
+ ///
+ /// The default device is selected based on a set of heuristics and can be
+ /// influenced using one of the following environment variables:
+ ///
+ /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE -
+ /// name of the compute device (e.g. "GTX TITAN")
+ /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE_TYPE
+ /// type of the compute device (e.g. "GPU" or "CPU")
+ /// \li \c BOOST_COMPUTE_DEFAULT_PLATFORM -
+ /// name of the platform (e.g. "NVIDIA CUDA")
+ /// \li \c BOOST_COMPUTE_DEFAULT_VENDOR -
+ /// name of the device vendor (e.g. "NVIDIA")
+ ///
+ /// The default device is determined once on the first time this function
+ /// is called. Calling this function multiple times will always result in
+ /// the same device being returned.
+ ///
+ /// If no OpenCL device is found on the system, a no_device_found exception
+ /// is thrown.
+ ///
+ /// For example, to print the name of the default compute device on the
+ /// system:
+ /// \code
+ /// // get the default compute device
+ /// boost::compute::device device = boost::compute::system::default_device();
+ ///
+ /// // print the name of the device
+ /// std::cout << "default device: " << device.name() << std::endl;
+ /// \endcode
+ static device default_device()
+ {
+ static device default_device = find_default_device();
+
+ return default_device;
+ }
+
+ /// Returns the device with \p name.
+ ///
+ /// \throws no_device_found if no device with \p name is found.
+ static device find_device(const std::string &name)
+ {
+ const std::vector<device> devices = system::devices();
+ for(size_t i = 0; i < devices.size(); i++){
+ const device& device = devices[i];
+
+ if(device.name() == name){
+ return device;
+ }
+ }
+
+ BOOST_THROW_EXCEPTION(no_device_found());
+ }
+
+ /// Returns a vector containing all of the compute devices on
+ /// the system.
+ ///
+ /// For example, to print out the name of each OpenCL-capable device
+ /// available on the system:
+ /// \code
+ /// for(const auto &device : boost::compute::system::devices()){
+ /// std::cout << device.name() << std::endl;
+ /// }
+ /// \endcode
+ static std::vector<device> devices()
+ {
+ std::vector<device> devices;
+
+ const std::vector<platform> platforms = system::platforms();
+ for(size_t i = 0; i < platforms.size(); i++){
+ const std::vector<device> platform_devices = platforms[i].devices();
+
+ devices.insert(
+ devices.end(), platform_devices.begin(), platform_devices.end()
+ );
+ }
+
+ return devices;
+ }
+
+ /// Returns the number of compute devices on the system.
+ static size_t device_count()
+ {
+ size_t count = 0;
+
+ const std::vector<platform> platforms = system::platforms();
+ for(size_t i = 0; i < platforms.size(); i++){
+ count += platforms[i].device_count();
+ }
+
+ return count;
+ }
+
+ /// Returns the default context for the system.
+ ///
+ /// The default context is created for the default device on the system
+ /// (as returned by default_device()).
+ ///
+ /// The default context is created once on the first time this function is
+ /// called. Calling this function multiple times will always result in the
+ /// same context object being returned.
+ static context default_context()
+ {
+ static context default_context(default_device());
+
+ return default_context;
+ }
+
+ /// Returns the default command queue for the system.
+ static command_queue& default_queue()
+ {
+ static command_queue queue(default_context(), default_device());
+
+ return queue;
+ }
+
+ /// Blocks until all outstanding computations on the default
+ /// command queue are complete.
+ ///
+ /// This is equivalent to:
+ /// \code
+ /// system::default_queue().finish();
+ /// \endcode
+ static void finish()
+ {
+ default_queue().finish();
+ }
+
+ /// Returns a vector containing each of the OpenCL platforms on the system.
+ ///
+ /// For example, to print out the name of each OpenCL platform present on
+ /// the system:
+ /// \code
+ /// for(const auto &platform : boost::compute::system::platforms()){
+ /// std::cout << platform.name() << std::endl;
+ /// }
+ /// \endcode
+ static std::vector<platform> platforms()
+ {
+ cl_uint count = 0;
+ clGetPlatformIDs(0, 0, &count);
+
+ std::vector<cl_platform_id> platform_ids(count);
+ clGetPlatformIDs(count, &platform_ids[0], 0);
+
+ std::vector<platform> platforms;
+ for(size_t i = 0; i < platform_ids.size(); i++){
+ platforms.push_back(platform(platform_ids[i]));
+ }
+
+ return platforms;
+ }
+
+ /// Returns the number of compute platforms on the system.
+ static size_t platform_count()
+ {
+ cl_uint count = 0;
+ clGetPlatformIDs(0, 0, &count);
+ return static_cast<size_t>(count);
+ }
+
+private:
+ /// \internal_
+ static device find_default_device()
+ {
+ // get a list of all devices on the system
+ const std::vector<device> devices_ = devices();
+ if(devices_.empty()){
+ BOOST_THROW_EXCEPTION(no_device_found());
+ }
+
+ // check for device from environment variable
+ const char *name = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE");
+ const char *type = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE_TYPE");
+ const char *platform = detail::getenv("BOOST_COMPUTE_DEFAULT_PLATFORM");
+ const char *vendor = detail::getenv("BOOST_COMPUTE_DEFAULT_VENDOR");
+
+ if(name || type || platform || vendor){
+ for(size_t i = 0; i < devices_.size(); i++){
+ const device& device = devices_[i];
+ if (name && !matches(device.name(), name))
+ continue;
+
+ if (type && matches(std::string("GPU"), type))
+ if (!(device.type() & device::gpu))
+ continue;
+
+ if (type && matches(std::string("CPU"), type))
+ if (!(device.type() & device::cpu))
+ continue;
+
+ if (platform && !matches(device.platform().name(), platform))
+ continue;
+
+ if (vendor && !matches(device.vendor(), vendor))
+ continue;
+
+ return device;
+ }
+ }
+
+ // find the first gpu device
+ for(size_t i = 0; i < devices_.size(); i++){
+ const device& device = devices_[i];
+
+ if(device.type() & device::gpu){
+ return device;
+ }
+ }
+
+ // find the first cpu device
+ for(size_t i = 0; i < devices_.size(); i++){
+ const device& device = devices_[i];
+
+ if(device.type() & device::cpu){
+ return device;
+ }
+ }
+
+ // return the first device found
+ return devices_[0];
+ }
+
+ /// \internal_
+ static bool matches(const std::string &str, const std::string &pattern)
+ {
+ return str.find(pattern) != std::string::npos;
+ }
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_SYSTEM_HPP
diff --git a/boost/compute/type_traits.hpp b/boost/compute/type_traits.hpp
new file mode 100644
index 0000000000..9ba98d9c2c
--- /dev/null
+++ b/boost/compute/type_traits.hpp
@@ -0,0 +1,25 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_HPP
+
+#include <boost/compute/type_traits/common_type.hpp>
+#include <boost/compute/type_traits/is_device_iterator.hpp>
+#include <boost/compute/type_traits/is_fundamental.hpp>
+#include <boost/compute/type_traits/is_vector_type.hpp>
+#include <boost/compute/type_traits/make_vector_type.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+#include <boost/compute/type_traits/scalar_type.hpp>
+#include <boost/compute/type_traits/type_definition.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_HPP
diff --git a/boost/compute/type_traits/common_type.hpp b/boost/compute/type_traits/common_type.hpp
new file mode 100644
index 0000000000..49a81ff3fd
--- /dev/null
+++ b/boost/compute/type_traits/common_type.hpp
@@ -0,0 +1,55 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP
+
+#include <boost/type_traits/common_type.hpp>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, size) \
+ template<> \
+ struct common_type<BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size), \
+ BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar)> \
+ { \
+ typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \
+ }; \
+ template<> \
+ struct common_type<BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar), \
+ BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size)> \
+ { \
+ typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(scalar) \
+ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 2) \
+ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 4) \
+ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 8) \
+ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 16) \
+
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(char)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uchar)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(short)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ushort)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(int)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uint)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(long)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ulong)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(float)
+BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(double)
+
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP
diff --git a/boost/compute/type_traits/detail/capture_traits.hpp b/boost/compute/type_traits/detail/capture_traits.hpp
new file mode 100644
index 0000000000..e790f80696
--- /dev/null
+++ b/boost/compute/type_traits/detail/capture_traits.hpp
@@ -0,0 +1,33 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP
+
+#include <boost/compute/type_traits/type_name.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+struct capture_traits
+{
+ static std::string type_name()
+ {
+ return ::boost::compute::type_name<T>();
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP
diff --git a/boost/compute/type_traits/is_device_iterator.hpp b/boost/compute/type_traits/is_device_iterator.hpp
new file mode 100644
index 0000000000..5e7021f2e8
--- /dev/null
+++ b/boost/compute/type_traits/is_device_iterator.hpp
@@ -0,0 +1,39 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP
+
+#include <boost/type_traits/integral_constant.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Meta-function returning \c true if \c Iterator is a device-iterator.
+///
+/// By default, this function returns false. Device iterator types (such as
+/// buffer_iterator) should specialize this trait and return \c true.
+///
+/// For example:
+/// \code
+/// is_device_iterator<buffer_iterator<int>>::value == true
+/// is_device_iterator<std::vector<int>::iterator>::value == false
+/// \endcode
+template<class Iterator>
+struct is_device_iterator : boost::false_type {};
+
+/// \internal_
+template<class Iterator>
+struct is_device_iterator<const Iterator> : is_device_iterator<Iterator> {};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP
diff --git a/boost/compute/type_traits/is_fundamental.hpp b/boost/compute/type_traits/is_fundamental.hpp
new file mode 100644
index 0000000000..6386f1184d
--- /dev/null
+++ b/boost/compute/type_traits/is_fundamental.hpp
@@ -0,0 +1,58 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Meta-function returning \c true if \p T is a fundamental (i.e.
+/// built-in) type.
+///
+/// For example,
+/// \code
+/// is_fundamental<float>::value == true
+/// is_fundamental<std::pair<int, float>>::value == false
+/// \endcode
+template<class T>
+struct is_fundamental : public boost::false_type {};
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(type) \
+ template<> struct is_fundamental<BOOST_PP_CAT(type, _)> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 2))> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 4))> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 8))> : boost::true_type {}; \
+ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 16))> : boost::true_type {};
+
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(char)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uchar)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(short)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ushort)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(int)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uint)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(long)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ulong)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(float)
+BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(double)
+
+#undef BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP
diff --git a/boost/compute/type_traits/is_vector_type.hpp b/boost/compute/type_traits/is_vector_type.hpp
new file mode 100644
index 0000000000..f4382f0e2b
--- /dev/null
+++ b/boost/compute/type_traits/is_vector_type.hpp
@@ -0,0 +1,38 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP
+
+#include <boost/mpl/bool.hpp>
+
+#include <boost/compute/type_traits/vector_size.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Meta-function returning \c true if \p T is a vector type.
+///
+/// For example,
+/// \code
+/// is_vector_type<int>::value == false
+/// is_vector_type<float4_>::value == true
+/// \endcode
+///
+/// \see make_vector_type, vector_size
+template<class T>
+struct is_vector_type : boost::mpl::bool_<vector_size<T>::value != 1>
+{
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP
diff --git a/boost/compute/type_traits/make_vector_type.hpp b/boost/compute/type_traits/make_vector_type.hpp
new file mode 100644
index 0000000000..6494ff267f
--- /dev/null
+++ b/boost/compute/type_traits/make_vector_type.hpp
@@ -0,0 +1,71 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP
+
+#include <boost/preprocessor/cat.hpp>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Meta-function which returns a vector type for \p Scalar with \p Size.
+///
+/// For example,
+/// \code
+/// make_vector_type<int, 2>::type == int2_
+/// make_vector_type<float, 4>::type == float4_
+/// \endcode
+///
+/// \see is_vector_type
+template<class Scalar, size_t Size>
+struct make_vector_type
+{
+};
+
+/// \internal_
+template<class Scalar>
+struct make_vector_type<Scalar, 1>
+{
+ typedef Scalar type;
+};
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, size) \
+ template<> \
+ struct make_vector_type<BOOST_PP_CAT(scalar, _), size> \
+ { \
+ typedef BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _) type; \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(scalar) \
+ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 2) \
+ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 4) \
+ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 8) \
+ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 16)
+
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(char)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uchar)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(short)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ushort)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(int)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uint)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(long)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ulong)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(float)
+BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(double)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP
diff --git a/boost/compute/type_traits/result_of.hpp b/boost/compute/type_traits/result_of.hpp
new file mode 100644
index 0000000000..3f475a35da
--- /dev/null
+++ b/boost/compute/type_traits/result_of.hpp
@@ -0,0 +1,39 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP
+
+#include <boost/utility/result_of.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Returns the result of \c Function when called with \c Args.
+///
+/// For example,
+/// \code
+/// // int + int = int
+/// result_of<plus(int, int)>::type == int
+/// \endcode
+template<class Signature>
+struct result_of
+{
+ // the default implementation uses the TR1-style result_of protocol. note
+ // that we explicitly do *not* use the C++11 decltype operator as we want
+ // the result type as it would be on an OpenCL device, not the actual C++
+ // type resulting from "invoking" the function on the host.
+ typedef typename ::boost::tr1_result_of<Signature>::type type;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP
diff --git a/boost/compute/type_traits/scalar_type.hpp b/boost/compute/type_traits/scalar_type.hpp
new file mode 100644
index 0000000000..c40682fd27
--- /dev/null
+++ b/boost/compute/type_traits/scalar_type.hpp
@@ -0,0 +1,72 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP
+
+#include <boost/preprocessor/cat.hpp>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Meta-function returning the scalar type for a vector type.
+///
+/// For example,
+/// \code
+/// scalar_type<float4_>::type == float
+/// \endcode
+template<class Vector>
+struct scalar_type
+{
+ /// \internal_
+ typedef void type;
+};
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \
+ template<> \
+ struct scalar_type<BOOST_PP_CAT(scalar, _)> \
+ { \
+ typedef BOOST_PP_CAT(scalar, _) type; \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, size) \
+ template<> \
+ struct scalar_type<BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)> \
+ { \
+ typedef BOOST_PP_CAT(scalar, _) type; \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(scalar) \
+ BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 2) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 4) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 8) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 16)
+
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(char)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uchar)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(short)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ushort)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(int)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uint)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(long)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ulong)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(float)
+BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(double)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP
diff --git a/boost/compute/type_traits/type_definition.hpp b/boost/compute/type_traits/type_definition.hpp
new file mode 100644
index 0000000000..de9095fbd2
--- /dev/null
+++ b/boost/compute/type_traits/type_definition.hpp
@@ -0,0 +1,39 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP
+
+#include <string>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+struct type_definition_trait;
+
+} // end detail namespace
+
+/// Returns the OpenCL type definition for \c T.
+///
+/// \return a string containing the type definition for \c T
+///
+/// \see type_name<T>()
+template<class T>
+inline std::string type_definition()
+{
+ return detail::type_definition_trait<T>::value();
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP
diff --git a/boost/compute/type_traits/type_name.hpp b/boost/compute/type_traits/type_name.hpp
new file mode 100644
index 0000000000..86ac7bc8bc
--- /dev/null
+++ b/boost/compute/type_traits/type_name.hpp
@@ -0,0 +1,124 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP
+
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/stringize.hpp>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+struct type_name_trait;
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(type) \
+ template<> \
+ struct type_name_trait<BOOST_PP_CAT(type, _)> \
+ { \
+ static const char* value() \
+ { \
+ return BOOST_PP_STRINGIZE(type); \
+ } \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, n) \
+ template<> \
+ struct type_name_trait<BOOST_PP_CAT(BOOST_PP_CAT(scalar, n), _)> \
+ { \
+ static const char* value() \
+ { \
+ return BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, n)); \
+ } \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(scalar) \
+ BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(scalar) \
+ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 2) \
+ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 4) \
+ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 8) \
+ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 16)
+
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(char)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uchar)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(short)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ushort)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(int)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uint)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(long)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ulong)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(float)
+BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(double)
+
+/// \internal_
+#define BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(type) \
+ template<> \
+ struct type_name_trait<type> \
+ { \
+ static const char* value() \
+ { \
+ return #type; \
+ } \
+ };
+
+BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(bool)
+BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(char)
+BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(void)
+
+} // end detail namespace
+
+/// Returns the OpenCL type name for the type \c T as a string.
+///
+/// \return a string containing the type name for \c T
+///
+/// For example:
+/// \code
+/// type_name<float>() == "float"
+/// type_name<float4_>() == "float4"
+/// \endcode
+///
+/// \see type_definition<T>()
+template<class T>
+inline const char* type_name()
+{
+ return detail::type_name_trait<T>::value();
+}
+
+} // end compute namespace
+} // end boost namespace
+
+/// Registers the OpenCL type for the C++ \p type to \p name.
+///
+/// For example, the following will allow Eigen's \c Vector2f type
+/// to be used with Boost.Compute algorithms and containers as the
+/// built-in \c float2 type.
+/// \code
+/// BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2)
+/// \endcode
+///
+/// This macro should be invoked in the global namespace.
+///
+/// \see type_name()
+#define BOOST_COMPUTE_TYPE_NAME(type, name) \
+ namespace boost { namespace compute { \
+ template<> \
+ inline const char* type_name<type>() \
+ { \
+ return #name; \
+ }}}
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP
diff --git a/boost/compute/type_traits/vector_size.hpp b/boost/compute/type_traits/vector_size.hpp
new file mode 100644
index 0000000000..c207a6fedd
--- /dev/null
+++ b/boost/compute/type_traits/vector_size.hpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP
+#define BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP
+
+#include <boost/preprocessor/cat.hpp>
+
+#include <boost/compute/types/fundamental.hpp>
+
+namespace boost {
+namespace compute {
+
+/// Meta-function returning the size (number of components) of a vector type
+/// \p T. For scalar types this function returns \c 1.
+///
+/// For example,
+/// \code
+/// vector_size<float>::value == 1
+/// vector_size<float4_>::value == 4
+/// \endcode
+template<class T>
+struct vector_size
+{
+ /// \internal_
+ BOOST_STATIC_CONSTANT(size_t, value = 1);
+};
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, size) \
+ template<> \
+ struct vector_size<BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)> \
+ { \
+ BOOST_STATIC_CONSTANT(size_t, value = size); \
+ };
+
+/// \internal_
+#define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(scalar) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 2) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 4) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 8) \
+ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 16)
+
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(char)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uchar)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(short)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ushort)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(int)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uint)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(long)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ulong)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(float)
+BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(double)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP
diff --git a/boost/compute/types.hpp b/boost/compute/types.hpp
new file mode 100644
index 0000000000..3d9120b65c
--- /dev/null
+++ b/boost/compute/types.hpp
@@ -0,0 +1,24 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPES_HPP
+#define BOOST_COMPUTE_TYPES_HPP
+
+/// \file
+///
+/// Meta-header to include all Boost.Compute types headers.
+
+#include <boost/compute/types/complex.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/types/pair.hpp>
+#include <boost/compute/types/struct.hpp>
+#include <boost/compute/types/tuple.hpp>
+
+#endif // BOOST_COMPUTE_TYPES_HPP
diff --git a/boost/compute/types/builtin.hpp b/boost/compute/types/builtin.hpp
new file mode 100644
index 0000000000..dc30f584eb
--- /dev/null
+++ b/boost/compute/types/builtin.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/types/fundamental.hpp> instead
+#include <boost/compute/types/fundamental.hpp>
diff --git a/boost/compute/types/complex.hpp b/boost/compute/types/complex.hpp
new file mode 100644
index 0000000000..1d60cef9e5
--- /dev/null
+++ b/boost/compute/types/complex.hpp
@@ -0,0 +1,196 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPES_COMPLEX_HPP
+#define BOOST_COMPUTE_TYPES_COMPLEX_HPP
+
+#include <complex>
+
+#include <boost/compute/functional.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/type_traits/make_vector_type.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class T>
+meta_kernel& operator<<(meta_kernel &kernel, const std::complex<T> &x)
+{
+ typedef typename std::complex<T> value_type;
+
+ kernel << "(" << type_name<value_type>() << ")"
+ << "(" << x.real() << ", " << x.imag() << ")";
+
+ return kernel;
+}
+
+// get<N>() result type specialization for std::complex<>
+template<size_t N, class T>
+struct get_result_type<N, std::complex<T> >
+{
+ typedef T type;
+};
+
+// get<N>() specialization for std::complex<>
+template<size_t N, class Arg, class T>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_get<N, Arg, std::complex<T> > &expr)
+{
+ BOOST_STATIC_ASSERT(N < 2);
+
+ return kernel << expr.m_arg << (N == 0 ? ".x" : ".y");
+}
+
+} // end detail namespace
+
+// returns the real component of a complex<T>
+template<class T>
+struct real
+{
+ typedef T result_type;
+
+ template<class Arg>
+ detail::invoked_get<0, Arg, std::complex<T> >
+ operator()(const Arg &x) const
+ {
+ return detail::invoked_get<0, Arg, std::complex<T> >(x);
+ }
+};
+
+// returns the imaginary component of a complex<T>
+template<class T>
+struct imag
+{
+ typedef T result_type;
+
+ template<class Arg>
+ detail::invoked_get<1, Arg, std::complex<T> >
+ operator()(const Arg &x) const
+ {
+ return detail::invoked_get<1, Arg, std::complex<T> >(x);
+ }
+};
+
+namespace detail {
+
+template<class Arg1, class Arg2, class T>
+struct invoked_complex_multiplies
+{
+ typedef typename std::complex<T> result_type;
+
+ invoked_complex_multiplies(const Arg1 &x, const Arg2 &y)
+ : m_x(x),
+ m_y(y)
+ {
+ }
+
+ Arg1 m_x;
+ Arg2 m_y;
+};
+
+template<class Arg1, class Arg2, class T>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_complex_multiplies<Arg1, Arg2, T> &expr)
+{
+ typedef typename std::complex<T> value_type;
+
+ kernel << "(" << type_name<value_type>() << ")"
+ << "(" << expr.m_x << ".x*" << expr.m_y << ".x-"
+ << expr.m_x << ".y*" << expr.m_y << ".y,"
+ << expr.m_x << ".y*" << expr.m_y << ".x+"
+ << expr.m_x << ".x*" << expr.m_y << ".y" << ")";
+
+ return kernel;
+}
+
+template<class Arg, class T>
+struct invoked_complex_conj
+{
+ typedef typename std::complex<T> result_type;
+
+ invoked_complex_conj(const Arg &arg)
+ : m_arg(arg)
+ {
+ }
+
+ Arg m_arg;
+};
+
+template<class Arg, class T>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_complex_conj<Arg, T> &expr)
+{
+ typedef typename std::complex<T> value_type;
+
+ kernel << "(" << type_name<value_type>() << ")"
+ << "(" << expr.m_arg << ".x" << ", -" << expr.m_arg << ".y" << ")";
+
+ return kernel;
+}
+
+} // end detail namespace
+
+// specialization for multiplies<T>
+template<class T>
+class multiplies<std::complex<T> > :
+ public function<std::complex<T> (std::complex<T>, std::complex<T>)>
+{
+public:
+ multiplies() :
+ function<
+ std::complex<T> (std::complex<T>, std::complex<T>)
+ >("complex_multiplies")
+ {
+ }
+
+ template<class Arg1, class Arg2>
+ detail::invoked_complex_multiplies<Arg1, Arg2, T>
+ operator()(const Arg1 &x, const Arg2 &y) const
+ {
+ return detail::invoked_complex_multiplies<Arg1, Arg2, T>(x, y);
+ }
+};
+
+// returns the complex conjugate of a complex<T>
+template<class T>
+struct conj
+{
+ typedef typename std::complex<T> result_type;
+
+ template<class Arg>
+ detail::invoked_complex_conj<Arg, T>
+ operator()(const Arg &x) const
+ {
+ return detail::invoked_complex_conj<Arg, T>(x);
+ }
+};
+
+namespace detail {
+
+// type_name() specialization for std::complex
+template<class T>
+struct type_name_trait<std::complex<T> >
+{
+ static const char* value()
+ {
+ typedef typename make_vector_type<T, 2>::type vector_type;
+
+ return type_name<vector_type>();
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPES_COMPLEX_HPP
diff --git a/boost/compute/types/fundamental.hpp b/boost/compute/types/fundamental.hpp
new file mode 100644
index 0000000000..c1502e327e
--- /dev/null
+++ b/boost/compute/types/fundamental.hpp
@@ -0,0 +1,172 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP
+#define BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP
+
+#include <cstring>
+#include <ostream>
+
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/comma.hpp>
+#include <boost/preprocessor/repetition.hpp>
+#include <boost/preprocessor/stringize.hpp>
+
+#include <boost/compute/cl.hpp>
+
+namespace boost {
+namespace compute {
+
+// scalar data types
+typedef cl_char char_;
+typedef cl_uchar uchar_;
+typedef cl_short short_;
+typedef cl_ushort ushort_;
+typedef cl_int int_;
+typedef cl_uint uint_;
+typedef cl_long long_;
+typedef cl_ulong ulong_;
+typedef cl_float float_;
+typedef cl_double double_;
+
+// converts uchar to ::boost::compute::uchar_
+#define BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar) \
+ BOOST_PP_CAT(::boost::compute::scalar, _)
+
+// converts float, 4 to ::boost::compute::float4_
+#define BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) \
+ BOOST_PP_CAT(BOOST_PP_CAT(::boost::compute::scalar, size), _)
+
+// vector data types
+template<class Scalar, size_t N>
+class vector_type
+{
+public:
+ typedef Scalar scalar_type;
+
+ vector_type()
+ {
+
+ }
+
+ explicit vector_type(const Scalar scalar)
+ {
+ for(size_t i = 0; i < N; i++)
+ m_value[i] = scalar;
+ }
+
+ vector_type(const vector_type<Scalar, N> &other)
+ {
+ std::memcpy(m_value, other.m_value, sizeof(m_value));
+ }
+
+ vector_type<Scalar, N>&
+ operator=(const vector_type<Scalar, N> &other)
+ {
+ std::memcpy(m_value, other.m_value, sizeof(m_value));
+ return *this;
+ }
+
+ size_t size() const
+ {
+ return N;
+ }
+
+ Scalar& operator[](size_t i)
+ {
+ return m_value[i];
+ }
+
+ Scalar operator[](size_t i) const
+ {
+ return m_value[i];
+ }
+
+ bool operator==(const vector_type<Scalar, N> &other) const
+ {
+ return std::memcmp(m_value, other.m_value, sizeof(m_value)) == 0;
+ }
+
+ bool operator!=(const vector_type<Scalar, N> &other) const
+ {
+ return !(*this == other);
+ }
+
+protected:
+ scalar_type m_value[N];
+};
+
+#define BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION(z, i, _) \
+ BOOST_PP_COMMA_IF(i) scalar_type BOOST_PP_CAT(arg, i)
+#define BOOST_COMPUTE_VECTOR_TYPE_DECLARE_CTOR_ARGS(scalar, size) \
+ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _)
+#define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG(z, i, _) \
+ m_value[i] = BOOST_PP_CAT(arg, i);
+#define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG(z, i, _) \
+ m_value[i] = arg;
+
+#define BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(cl_scalar, size, class_name) \
+ class class_name : public vector_type<cl_scalar, size> \
+ { \
+ public: \
+ class_name() { } \
+ explicit class_name( scalar_type arg ) \
+ { \
+ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG, _) \
+ } \
+ class_name( \
+ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) \
+ ) \
+ { \
+ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG, _) \
+ } \
+ };
+
+#define BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, size) \
+ BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(BOOST_PP_CAT(cl_, scalar), \
+ size, \
+ BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)) \
+ \
+ inline std::ostream& operator<<( \
+ std::ostream &s, \
+ const BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) &v) \
+ { \
+ s << BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, size)) << "("; \
+ for(size_t i = 0; i < size; i++){\
+ s << v[i]; \
+ if(i != size - 1){\
+ s << ", "; \
+ } \
+ } \
+ s << ")"; \
+ return s; \
+ }
+
+#define BOOST_COMPUTE_DECLARE_VECTOR_TYPES(scalar) \
+ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 2) \
+ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 4) \
+ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 8) \
+ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 16) \
+
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(char)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uchar)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(short)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ushort)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(int)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uint)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(long)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ulong)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(float)
+BOOST_COMPUTE_DECLARE_VECTOR_TYPES(double)
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP
diff --git a/boost/compute/types/pair.hpp b/boost/compute/types/pair.hpp
new file mode 100644
index 0000000000..96db6539f1
--- /dev/null
+++ b/boost/compute/types/pair.hpp
@@ -0,0 +1,117 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPES_PAIR_HPP
+#define BOOST_COMPUTE_TYPES_PAIR_HPP
+
+#include <string>
+#include <utility>
+
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/type_traits/type_definition.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// meta_kernel operator for std::pair literals
+template<class T1, class T2>
+inline meta_kernel&
+operator<<(meta_kernel &kernel, const std::pair<T1, T2> &x)
+{
+ kernel << "(" << type_name<std::pair<T1, T2> >() << ")"
+ << "{" << kernel.make_lit(x.first) << ", "
+ << kernel.make_lit(x.second) << "}";
+
+ return kernel;
+}
+
+// inject_type() specialization for std::pair
+template<class T1, class T2>
+struct inject_type_impl<std::pair<T1, T2> >
+{
+ void operator()(meta_kernel &kernel)
+ {
+ typedef std::pair<T1, T2> pair_type;
+
+ kernel.inject_type<T1>();
+ kernel.inject_type<T2>();
+
+ kernel.add_type_declaration<pair_type>(type_definition<pair_type>());
+ }
+};
+
+// get<N>() result type specialization for std::pair<>
+template<class T1, class T2>
+struct get_result_type<0, std::pair<T1, T2> >
+{
+ typedef T1 type;
+};
+
+template<class T1, class T2>
+struct get_result_type<1, std::pair<T1, T2> >
+{
+ typedef T2 type;
+};
+
+// get<N>() specialization for std::pair<>
+template<size_t N, class Arg, class T1, class T2>
+inline meta_kernel& operator<<(meta_kernel &kernel,
+ const invoked_get<N, Arg, std::pair<T1, T2> > &expr)
+{
+ kernel.inject_type<std::pair<T1, T2> >();
+
+ return kernel << expr.m_arg << (N == 0 ? ".first" : ".second");
+}
+
+} // end detail namespace
+
+namespace detail {
+
+// type_name() specialization for std::pair
+template<class T1, class T2>
+struct type_name_trait<std::pair<T1, T2> >
+{
+ static const char* value()
+ {
+ static std::string name =
+ std::string("_pair_") +
+ type_name<T1>() + "_" + type_name<T2>() +
+ "_t";
+
+ return name.c_str();
+ }
+};
+
+// type_definition() specialization for std::pair
+template<class T1, class T2>
+struct type_definition_trait<std::pair<T1, T2> >
+{
+ static std::string value()
+ {
+ typedef std::pair<T1, T2> pair_type;
+
+ std::stringstream declaration;
+ declaration << "typedef struct {\n"
+ << " " << type_name<T1>() << " first;\n"
+ << " " << type_name<T2>() << " second;\n"
+ << "} " << type_name<pair_type>() << ";\n";
+
+ return declaration.str();
+ }
+};
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPES_PAIR_HPP
diff --git a/boost/compute/types/struct.hpp b/boost/compute/types/struct.hpp
new file mode 100644
index 0000000000..92aeaedf22
--- /dev/null
+++ b/boost/compute/types/struct.hpp
@@ -0,0 +1,173 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPES_STRUCT_HPP
+#define BOOST_COMPUTE_TYPES_STRUCT_HPP
+
+#include <sstream>
+
+#include <boost/static_assert.hpp>
+
+#include <boost/preprocessor/expr_if.hpp>
+#include <boost/preprocessor/stringize.hpp>
+#include <boost/preprocessor/seq/fold_left.hpp>
+#include <boost/preprocessor/seq/for_each.hpp>
+#include <boost/preprocessor/seq/transform.hpp>
+
+#include <boost/compute/type_traits/type_definition.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/variadic_macros.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+template<class Struct, class T>
+inline std::string adapt_struct_insert_member(T Struct::*, const char *name)
+{
+ std::stringstream s;
+ s << " " << type_name<T>() << " " << name << ";\n";
+ return s.str();
+}
+
+
+template<class Struct, class T, int N>
+inline std::string adapt_struct_insert_member(T (Struct::*)[N], const char *name)
+{
+ std::stringstream s;
+ s << " " << type_name<T>() << " " << name << "[" << N << "]" << ";\n";
+ return s.str();
+}
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER(r, type, member) \
+ << ::boost::compute::detail::adapt_struct_insert_member( \
+ &type::member, BOOST_PP_STRINGIZE(member) \
+ )
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER(r, data, i, elem) \
+ BOOST_PP_EXPR_IF(i, << ", ") << data.elem
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE(s, struct_, member_) \
+ sizeof(((struct_ *)0)->member_)
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD(s, x, y) (x+y)
+
+/// \internal_
+#define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_) \
+ BOOST_PP_SEQ_FOLD_LEFT( \
+ BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD, \
+ 0, \
+ BOOST_PP_SEQ_TRANSFORM( \
+ BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE, struct_, members_ \
+ ) \
+ )
+
+/// \internal_
+///
+/// Returns true if struct_ contains no internal padding bytes (i.e. it is
+/// packed). members_ is a sequence of the names of the struct members.
+#define BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(struct_, members_) \
+ (sizeof(struct_) == BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_))
+
+/// The BOOST_COMPUTE_ADAPT_STRUCT() macro makes a C++ struct/class available
+/// to OpenCL kernels.
+///
+/// \param type The C++ type.
+/// \param name The OpenCL name.
+/// \param members A tuple of the struct's members.
+///
+/// For example, to adapt a 2D particle struct with position (x, y) and
+/// velocity (dx, dy):
+/// \code
+/// // c++ struct definition
+/// struct Particle
+/// {
+/// float x, y;
+/// float dx, dy;
+/// };
+///
+/// // adapt struct for OpenCL
+/// BOOST_COMPUTE_ADAPT_STRUCT(Particle, Particle, (x, y, dx, dy))
+/// \endcode
+///
+/// After adapting the struct it can be used in Boost.Compute containers
+/// and with Boost.Compute algorithms:
+/// \code
+/// // create vector of particles
+/// boost::compute::vector<Particle> particles = ...
+///
+/// // function to compare particles by their x-coordinate
+/// BOOST_COMPUTE_FUNCTION(bool, sort_by_x, (Particle a, Particle b),
+/// {
+/// return a.x < b.x;
+/// });
+///
+/// // sort particles by their x-coordinate
+/// boost::compute::sort(
+/// particles.begin(), particles.end(), sort_by_x, queue
+/// );
+/// \endcode
+///
+/// Due to differences in struct padding between the host compiler and the
+/// device compiler, the \c BOOST_COMPUTE_ADAPT_STRUCT() macro requires that
+/// the adapted struct is packed (i.e. no padding bytes between members).
+///
+/// \see type_name()
+#define BOOST_COMPUTE_ADAPT_STRUCT(type, name, members) \
+ BOOST_STATIC_ASSERT_MSG( \
+ BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(type, BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members)), \
+ "BOOST_COMPUTE_ADAPT_STRUCT() does not support structs with internal padding." \
+ ); \
+ BOOST_COMPUTE_TYPE_NAME(type, name) \
+ namespace boost { namespace compute { \
+ template<> \
+ inline std::string type_definition<type>() \
+ { \
+ std::stringstream declaration; \
+ declaration << "typedef struct __attribute__((packed)) {\n" \
+ BOOST_PP_SEQ_FOR_EACH( \
+ BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER, \
+ type, \
+ BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \
+ ) \
+ << "} " << type_name<type>() << ";\n"; \
+ return declaration.str(); \
+ } \
+ namespace detail { \
+ template<> \
+ struct inject_type_impl<type> \
+ { \
+ void operator()(meta_kernel &kernel) \
+ { \
+ kernel.add_type_declaration<type>(type_definition<type>()); \
+ } \
+ }; \
+ inline meta_kernel& operator<<(meta_kernel &k, type s) \
+ { \
+ return k << "(" << #name << "){" \
+ BOOST_PP_SEQ_FOR_EACH_I( \
+ BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER, \
+ s, \
+ BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \
+ ) \
+ << "}"; \
+ } \
+ }}}
+
+#endif // BOOST_COMPUTE_TYPES_STRUCT_HPP
diff --git a/boost/compute/types/tuple.hpp b/boost/compute/types/tuple.hpp
new file mode 100644
index 0000000000..095bd95448
--- /dev/null
+++ b/boost/compute/types/tuple.hpp
@@ -0,0 +1,220 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_TYPES_TUPLE_HPP
+#define BOOST_COMPUTE_TYPES_TUPLE_HPP
+
+#include <string>
+#include <utility>
+
+#include <boost/preprocessor/enum.hpp>
+#include <boost/preprocessor/expr_if.hpp>
+#include <boost/preprocessor/repetition.hpp>
+#include <boost/tuple/tuple.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/functional/get.hpp>
+#include <boost/compute/type_traits/type_name.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+
+#ifndef BOOST_COMPUTE_NO_STD_TUPLE
+#include <tuple>
+#endif
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+// meta_kernel operators for boost::tuple literals
+#define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \
+ BOOST_PP_EXPR_IF(n, << ", ") \
+ << kernel.make_lit(boost::get<n>(x))
+
+#define BOOST_COMPUTE_PRINT_TUPLE(z, n, unused) \
+template<BOOST_PP_ENUM_PARAMS(n, class T)> \
+inline meta_kernel& \
+operator<<(meta_kernel &kernel, \
+ const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &x) \
+{ \
+ return kernel \
+ << "(" \
+ << type_name<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> >() \
+ << ")" \
+ << "{" \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \
+ << "}"; \
+}
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TUPLE, ~)
+
+#undef BOOST_COMPUTE_PRINT_TUPLE
+#undef BOOST_COMPUTE_PRINT_ELEM
+
+// inject_type() specializations for boost::tuple
+#define BOOST_COMPUTE_INJECT_TYPE(z, n, unused) \
+ kernel.inject_type<T ## n>();
+
+#define BOOST_COMPUTE_INJECT_DECL(z, n, unused) \
+ << " " << type_name<T ## n>() << " v" #n ";\n"
+
+#define BOOST_COMPUTE_INJECT_IMPL(z, n, unused) \
+template<BOOST_PP_ENUM_PARAMS(n, class T)> \
+struct inject_type_impl<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \
+{ \
+ void operator()(meta_kernel &kernel) \
+ { \
+ typedef boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> tuple_type; \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_TYPE, ~) \
+ std::stringstream declaration; \
+ declaration << "typedef struct {\n" \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_DECL, ~) \
+ << "} " << type_name<tuple_type>() << ";\n"; \
+ kernel.add_type_declaration<tuple_type>(declaration.str()); \
+ } \
+};
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_INJECT_IMPL, ~)
+
+#undef BOOST_COMPUTE_INJECT_IMPL
+#undef BOOST_COMPUTE_INJECT_DECL
+#undef BOOST_COMPUTE_INJECT_TYPE
+
+#ifdef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+// type_name() specializations for boost::tuple (without variadic templates)
+#define BOOST_COMPUTE_PRINT_TYPE(z, n, unused) \
+ + type_name<T ## n>() + "_"
+
+#define BOOST_COMPUTE_PRINT_TYPE_NAME(z, n, unused) \
+template<BOOST_PP_ENUM_PARAMS(n, class T)> \
+struct type_name_trait<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \
+{ \
+ static const char* value() \
+ { \
+ static std::string name = \
+ std::string("boost_tuple_") \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_TYPE, ~) \
+ "t"; \
+ return name.c_str(); \
+ } \
+};
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TYPE_NAME, ~)
+
+#undef BOOST_COMPUTE_PRINT_TYPE_NAME
+#undef BOOST_COMPUTE_PRINT_TYPE
+
+#else
+template<size_t N, class T, class... Rest>
+struct write_tuple_type_names
+{
+ void operator()(std::ostream &os)
+ {
+ os << type_name<T>() << "_";
+ write_tuple_type_names<N-1, Rest...>()(os);
+ }
+};
+
+template<class T, class... Rest>
+struct write_tuple_type_names<1, T, Rest...>
+{
+ void operator()(std::ostream &os)
+ {
+ os << type_name<T>();
+ }
+};
+
+// type_name<> specialization for boost::tuple<...> (with variadic templates)
+template<class... T>
+struct type_name_trait<boost::tuple<T...>>
+{
+ static const char* value()
+ {
+ static std::string str = make_type_name();
+
+ return str.c_str();
+ }
+
+ static std::string make_type_name()
+ {
+ typedef typename boost::tuple<T...> tuple_type;
+
+ std::stringstream s;
+ s << "boost_tuple_";
+ write_tuple_type_names<
+ boost::tuples::length<tuple_type>::value, T...
+ >()(s);
+ s << "_t";
+ return s.str();
+ }
+};
+#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+
+#ifndef BOOST_COMPUTE_NO_STD_TUPLE
+// type_name<> specialization for std::tuple<T...>
+template<class... T>
+struct type_name_trait<std::tuple<T...>>
+{
+ static const char* value()
+ {
+ static std::string str = make_type_name();
+
+ return str.c_str();
+ }
+
+ static std::string make_type_name()
+ {
+ typedef typename std::tuple<T...> tuple_type;
+
+ std::stringstream s;
+ s << "std_tuple_";
+ write_tuple_type_names<
+ std::tuple_size<tuple_type>::value, T...
+ >()(s);
+ s << "_t";
+ return s.str();
+ }
+};
+#endif // BOOST_COMPUTE_NO_STD_TUPLE
+
+// get<N>() result type specialization for boost::tuple<>
+#define BOOST_COMPUTE_GET_RESULT_TYPE(z, n, unused) \
+template<size_t N, BOOST_PP_ENUM_PARAMS(n, class T)> \
+struct get_result_type<N, boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \
+{ \
+ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \
+ typedef typename boost::tuples::element<N, T>::type type; \
+};
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_RESULT_TYPE, ~)
+
+#undef BOOST_COMPUTE_GET_RESULT_TYPE
+
+
+// get<N>() specialization for boost::tuple<>
+#define BOOST_COMPUTE_GET_N(z, n, unused) \
+template<size_t N, class Arg, BOOST_PP_ENUM_PARAMS(n, class T)> \
+inline meta_kernel& operator<<(meta_kernel &kernel, \
+ const invoked_get<N, Arg, boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > &expr) \
+{ \
+ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \
+ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<T>::value)); \
+ kernel.inject_type<T>(); \
+ return kernel << expr.m_arg << ".v" << uint_(N); \
+}
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_N, ~)
+
+#undef BOOST_COMPUTE_GET_N
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_TYPES_TUPLE_HPP
diff --git a/boost/compute/user_event.hpp b/boost/compute/user_event.hpp
new file mode 100644
index 0000000000..a3fdba033e
--- /dev/null
+++ b/boost/compute/user_event.hpp
@@ -0,0 +1,88 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_USER_EVENT_HPP
+#define BOOST_COMPUTE_USER_EVENT_HPP
+
+#include <boost/compute/event.hpp>
+#include <boost/compute/context.hpp>
+
+namespace boost {
+namespace compute {
+
+#if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+/// \class user_event
+/// \brief An user-created event.
+///
+/// \opencl_version_warning{1,1}
+///
+/// \see event
+class user_event : public event
+{
+public:
+ /// Creates a new user-event object.
+ ///
+ /// \see_opencl_ref{clCreateUserEvent}
+ explicit user_event(const context &context)
+ {
+ cl_int error;
+ m_event = clCreateUserEvent(context.get(), &error);
+ if(!m_event){
+ BOOST_THROW_EXCEPTION(opencl_error(error));
+ }
+ }
+
+ /// Creates a new user-event from \p other.
+ user_event(const user_event &other)
+ : event(other)
+ {
+ }
+
+ /// Copies the user-event from \p other to \c *this.
+ user_event& operator=(const user_event &other)
+ {
+ event::operator=(other);
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new user event object from \p other.
+ user_event(user_event&& other) BOOST_NOEXCEPT
+ : event(std::move(other))
+ {
+ }
+
+ /// Move-assigns the user event from \p other to \c *this.
+ user_event& operator=(user_event&& other) BOOST_NOEXCEPT
+ {
+ event::operator=(std::move(other));
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Sets the execution status for the user-event.
+ ///
+ /// \see_opencl_ref{clSetUserEventStatus}
+ void set_status(cl_int execution_status)
+ {
+ cl_int ret = clSetUserEventStatus(m_event, execution_status);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
+ }
+};
+#endif // CL_VERSION_1_1
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_EVENT_HPP
diff --git a/boost/compute/utility.hpp b/boost/compute/utility.hpp
new file mode 100644
index 0000000000..e6d1f6eab6
--- /dev/null
+++ b/boost/compute/utility.hpp
@@ -0,0 +1,21 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_HPP
+#define BOOST_COMPUTE_UTILITY_HPP
+
+#include <boost/compute/utility/dim.hpp>
+#include <boost/compute/utility/extents.hpp>
+#include <boost/compute/utility/invoke.hpp>
+#include <boost/compute/utility/program_cache.hpp>
+#include <boost/compute/utility/source.hpp>
+#include <boost/compute/utility/wait_list.hpp>
+
+#endif // BOOST_COMPUTE_UTILITY_HPP
diff --git a/boost/compute/utility/dim.hpp b/boost/compute/utility/dim.hpp
new file mode 100644
index 0000000000..210c09cf6e
--- /dev/null
+++ b/boost/compute/utility/dim.hpp
@@ -0,0 +1,76 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_DIM_HPP
+#define BOOST_COMPUTE_UTILITY_DIM_HPP
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/utility/extents.hpp>
+
+namespace boost {
+namespace compute {
+
+#ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+/// The variadic \c dim() function provides a concise syntax for creating
+/// \ref extents objects.
+///
+/// For example,
+/// \code
+/// extents<2> region = dim(640, 480); // region == (640, 480)
+/// \endcode
+///
+/// \see \ref extents "extents<N>"
+template<class... Args>
+inline extents<sizeof...(Args)> dim(Args... args)
+{
+ return extents<sizeof...(Args)>({ static_cast<size_t>(args)... });
+}
+
+#if BOOST_WORKAROUND(BOOST_MSVC, <= 1800)
+// for some inexplicable reason passing one parameter to 'dim' variadic template
+// generates compile error on msvc 2013 update 4
+template<class T>
+inline extents<1> dim(T arg)
+{
+ return extents<1>(static_cast<size_t>(arg));
+}
+#endif // BOOST_WORKAROUND(BOOST_MSVC, <= 1800)
+
+#else
+// dim() function definitions for non-c++11 compilers
+#define BOOST_COMPUTE_DETAIL_ASSIGN_DIM(z, n, var) \
+ var[n] = BOOST_PP_CAT(e, n);
+
+#define BOOST_COMPUTE_DETAIL_DEFINE_DIM(z, n, var) \
+ inline extents<n> dim(BOOST_PP_ENUM_PARAMS(n, size_t e)) \
+ { \
+ extents<n> exts; \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_ASSIGN_DIM, exts) \
+ return exts; \
+ }
+
+BOOST_PP_REPEAT(BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_DIM, ~)
+
+#undef BOOST_COMPUTE_DETAIL_ASSIGN_DIM
+#undef BOOST_COMPUTE_DETAIL_DEFINE_DIM
+
+#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
+
+/// \internal_
+template<size_t N>
+inline extents<N> dim()
+{
+ return extents<N>();
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_UTILITY_DIM_HPP
diff --git a/boost/compute/utility/extents.hpp b/boost/compute/utility/extents.hpp
new file mode 100644
index 0000000000..9666d47abc
--- /dev/null
+++ b/boost/compute/utility/extents.hpp
@@ -0,0 +1,164 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_EXTENTS_HPP
+#define BOOST_COMPUTE_UTILITY_EXTENTS_HPP
+
+#include <functional>
+#include <numeric>
+
+#include <boost/compute/config.hpp>
+
+#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+#include <initializer_list>
+#endif
+
+#include <boost/array.hpp>
+
+namespace boost {
+namespace compute {
+
+/// The extents class contains an array of n-dimensional extents.
+///
+/// \see dim()
+template<size_t N>
+class extents
+{
+public:
+ typedef size_t size_type;
+ static const size_type static_size = N;
+ typedef boost::array<size_t, N> array_type;
+ typedef typename array_type::iterator iterator;
+ typedef typename array_type::const_iterator const_iterator;
+
+ /// Creates an extents object with each component set to zero.
+ ///
+ /// For example:
+ /// \code
+ /// extents<3> exts(); // (0, 0, 0)
+ /// \endcode
+ extents()
+ {
+ m_extents.fill(0);
+ }
+
+ /// Creates an extents object with each component set to \p value.
+ ///
+ /// For example:
+ /// \code
+ /// extents<3> exts(1); // (1, 1, 1)
+ /// \endcode
+ explicit extents(size_t value)
+ {
+ m_extents.fill(value);
+ }
+
+ #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+ /// Creates an extents object with \p values.
+ extents(std::initializer_list<size_t> values)
+ {
+ BOOST_ASSERT(values.size() == N);
+
+ std::copy(values.begin(), values.end(), m_extents.begin());
+ }
+ #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
+
+ /// Returns the size (i.e. dimensionality) of the extents array.
+ size_type size() const
+ {
+ return N;
+ }
+
+ /// Returns the linear size of the extents. This is equivalent to the
+ /// product of each extent in each dimension.
+ size_type linear() const
+ {
+ return std::accumulate(
+ m_extents.begin(), m_extents.end(), 1, std::multiplies<size_type>()
+ );
+ }
+
+ /// Returns a pointer to the extents data array.
+ ///
+ /// This is useful for passing the extents data to OpenCL APIs which
+ /// expect an array of \c size_t.
+ size_t* data()
+ {
+ return m_extents.data();
+ }
+
+ /// \overload
+ const size_t* data() const
+ {
+ return m_extents.data();
+ }
+
+ iterator begin()
+ {
+ return m_extents.begin();
+ }
+
+ const_iterator begin() const
+ {
+ return m_extents.begin();
+ }
+
+ const_iterator cbegin() const
+ {
+ return m_extents.cbegin();
+ }
+
+ iterator end()
+ {
+ return m_extents.end();
+ }
+
+ const_iterator end() const
+ {
+ return m_extents.end();
+ }
+
+ const_iterator cend() const
+ {
+ return m_extents.cend();
+ }
+
+ /// Returns a reference to the extent at \p index.
+ size_t& operator[](size_t index)
+ {
+ return m_extents[index];
+ }
+
+ /// \overload
+ const size_t& operator[](size_t index) const
+ {
+ return m_extents[index];
+ }
+
+ /// Returns \c true if the extents in \c *this are the same as \p other.
+ bool operator==(const extents &other) const
+ {
+ return m_extents == other.m_extents;
+ }
+
+ /// Returns \c true if the extents in \c *this are not the same as \p other.
+ bool operator!=(const extents &other) const
+ {
+ return m_extents != other.m_extents;
+ }
+
+private:
+ array_type m_extents;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_UTILITY_EXTENTS_HPP
diff --git a/boost/compute/utility/invoke.hpp b/boost/compute/utility/invoke.hpp
new file mode 100644
index 0000000000..b03162aa2b
--- /dev/null
+++ b/boost/compute/utility/invoke.hpp
@@ -0,0 +1,71 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://kylelutz.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_INVOKE_HPP
+#define BOOST_COMPUTE_UTILITY_INVOKE_HPP
+
+#include <boost/preprocessor/enum.hpp>
+#include <boost/preprocessor/repetition.hpp>
+
+#include <boost/compute/config.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/container/detail/scalar.hpp>
+#include <boost/compute/type_traits/result_of.hpp>
+
+namespace boost {
+namespace compute {
+
+#define BOOST_COMPUTE_DETAIL_INVOKE_ARG(z, n, unused) \
+ BOOST_PP_COMMA_IF(n) k.var<BOOST_PP_CAT(T, n)>("arg" BOOST_PP_STRINGIZE(n))
+
+#define BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG(z, n, unused) \
+ k.add_set_arg("arg" BOOST_PP_STRINGIZE(n), BOOST_PP_CAT(arg, n));
+
+#define BOOST_COMPUTE_DETAIL_DEFINE_INVOKE(z, n, unused) \
+template<class Function, BOOST_PP_ENUM_PARAMS(n, class T)> \
+inline typename result_of<Function(BOOST_PP_ENUM_PARAMS(n, T))>::type \
+invoke(const Function& function, command_queue& queue, BOOST_PP_ENUM_BINARY_PARAMS(n, const T, &arg)) \
+{ \
+ typedef typename result_of<Function(BOOST_PP_ENUM_PARAMS(n, T))>::type result_type; \
+ detail::meta_kernel k("invoke"); \
+ detail::scalar<result_type> result(queue.get_context()); \
+ const size_t result_arg = k.add_arg<result_type *>(memory_object::global_memory, "result"); \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG, ~) \
+ k << "*result = " << function( \
+ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ARG, ~) \
+ ) << ";"; \
+ k.set_arg(result_arg, result.get_buffer()); \
+ k.exec(queue); \
+ return result.read(queue); \
+}
+
+BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_INVOKE, ~)
+
+#undef BOOST_COMPUTE_DETAIL_INVOKE_ARG
+#undef BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG
+#undef BOOST_COMPUTE_DETAIL_DEFINE_INVOKE
+
+#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED
+/// Invokes \p function with \p args on \p queue.
+///
+/// For example, to invoke the builtin abs() function:
+/// \code
+/// int result = invoke(abs<int>(), queue, -10); // returns 10
+/// \endcode
+template<class Function, class... Args>
+inline typename result_of<Function(Args...)>::type
+invoke(const Function& function, command_queue& queue, const Args&... args);
+#endif // BOOST_COMPUTE_DOXYGEN_INVOKED
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_UTILITY_INVOKE_HPP
diff --git a/boost/compute/utility/program_cache.hpp b/boost/compute/utility/program_cache.hpp
new file mode 100644
index 0000000000..c80e1a3b1e
--- /dev/null
+++ b/boost/compute/utility/program_cache.hpp
@@ -0,0 +1,172 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP
+#define BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP
+
+#include <string>
+#include <utility>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/make_shared.hpp>
+#include <boost/noncopyable.hpp>
+
+#include <boost/compute/context.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/detail/lru_cache.hpp>
+#include <boost/compute/detail/global_static.hpp>
+
+namespace boost {
+namespace compute {
+
+/// The program_cache class stores \ref program objects in a LRU cache.
+///
+/// This class can be used to help mitigate the overhead of OpenCL's run-time
+/// kernel compilation model. Commonly used programs can be stored persistently
+/// in the cache and only compiled once on their first use.
+///
+/// Program objects are stored and retreived based on a user-defined cache key
+/// along with the options used to build the program (if any).
+///
+/// For example, to insert a program into the cache:
+/// \code
+/// cache.insert("foo", foo_program);
+/// \endcode
+///
+/// And to retreive the program later:
+/// \code
+/// boost::optional<program> p = cache.get("foo");
+/// if(p){
+/// // program found in cache
+/// }
+/// \endcode
+///
+/// \see program
+class program_cache : boost::noncopyable
+{
+public:
+ /// Creates a new program cache with space for \p capacity number of
+ /// program objects.
+ program_cache(size_t capacity)
+ : m_cache(capacity)
+ {
+ }
+
+ /// Destroys the program cache.
+ ~program_cache()
+ {
+ }
+
+ /// Returns the number of program objects currently stored in the cache.
+ size_t size() const
+ {
+ return m_cache.size();
+ }
+
+ /// Returns the total capacity of the cache.
+ size_t capacity() const
+ {
+ return m_cache.capacity();
+ }
+
+ /// Clears the program cache.
+ void clear()
+ {
+ m_cache.clear();
+ }
+
+ /// Returns the program object with \p key. Returns a null optional if no
+ /// program with \p key exists in the cache.
+ boost::optional<program> get(const std::string &key)
+ {
+ return m_cache.get(std::make_pair(key, std::string()));
+ }
+
+ /// Returns the program object with \p key and \p options. Returns a null
+ /// optional if no program with \p key and \p options exists in the cache.
+ boost::optional<program> get(const std::string &key, const std::string &options)
+ {
+ return m_cache.get(std::make_pair(key, options));
+ }
+
+ /// Inserts \p program into the cache with \p key.
+ void insert(const std::string &key, const program &program)
+ {
+ insert(key, std::string(), program);
+ }
+
+ /// Inserts \p program into the cache with \p key and \p options.
+ void insert(const std::string &key, const std::string &options, const program &program)
+ {
+ m_cache.insert(std::make_pair(key, options), program);
+ }
+
+ /// Loads the program with \p key from the cache if it exists. Otherwise
+ /// builds a new program with \p source and \p options, stores it in the
+ /// cache, and returns it.
+ ///
+ /// This is a convenience function to simplify the common pattern of
+ /// attempting to load a program from the cache and, if not present,
+ /// building the program from source and storing it in the cache.
+ ///
+ /// Equivalent to:
+ /// \code
+ /// boost::optional<program> p = get(key, options);
+ /// if(!p){
+ /// p = program::create_with_source(source, context);
+ /// p->build(options);
+ /// insert(key, options, *p);
+ /// }
+ /// return *p;
+ /// \endcode
+ program get_or_build(const std::string &key,
+ const std::string &options,
+ const std::string &source,
+ const context &context)
+ {
+ boost::optional<program> p = get(key, options);
+ if(!p){
+ p = program::build_with_source(source, context, options);
+
+ insert(key, options, *p);
+ }
+ return *p;
+ }
+
+ /// Returns the global program cache for \p context.
+ ///
+ /// This global cache is used internally by Boost.Compute to store compiled
+ /// program objects used by its algorithms. All Boost.Compute programs are
+ /// stored with a cache key beginning with \c "__boost". User programs
+ /// should avoid using the same prefix in order to prevent collisions.
+ static boost::shared_ptr<program_cache> get_global_cache(const context &context)
+ {
+ typedef detail::lru_cache<cl_context, boost::shared_ptr<program_cache> > cache_map;
+
+ BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, (8));
+
+ boost::optional<boost::shared_ptr<program_cache> > cache = caches.get(context.get());
+ if(!cache){
+ cache = boost::make_shared<program_cache>(64);
+
+ caches.insert(context.get(), *cache);
+ }
+
+ return *cache;
+ }
+
+private:
+ detail::lru_cache<std::pair<std::string, std::string>, program> m_cache;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP
diff --git a/boost/compute/utility/source.hpp b/boost/compute/utility/source.hpp
new file mode 100644
index 0000000000..3b073fbc82
--- /dev/null
+++ b/boost/compute/utility/source.hpp
@@ -0,0 +1,39 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_SOURCE_HPP
+#define BOOST_COMPUTE_UTILITY_SOURCE_HPP
+
+/// Stringizes OpenCL source code.
+///
+/// For example, to create a simple kernel which squares each input value:
+/// \code
+/// const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
+/// __kernel void square(const float *input, float *output)
+/// {
+/// const uint i = get_global_id(0);
+/// const float x = input[i];
+/// output[i] = x * x;
+/// }
+/// );
+///
+/// // create and build square program
+/// program square_program = program::build_with_source(source, context);
+///
+/// // create square kernel
+/// kernel square_kernel(square_program, "square");
+/// \endcode
+#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED
+#define BOOST_COMPUTE_STRINGIZE_SOURCE(source)
+#else
+#define BOOST_COMPUTE_STRINGIZE_SOURCE(...) #__VA_ARGS__
+#endif
+
+#endif // BOOST_COMPUTE_UTILITY_SOURCE_HPP
diff --git a/boost/compute/utility/wait_list.hpp b/boost/compute/utility/wait_list.hpp
new file mode 100644
index 0000000000..9a7e74bac0
--- /dev/null
+++ b/boost/compute/utility/wait_list.hpp
@@ -0,0 +1,203 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP
+#define BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP
+
+#include <vector>
+
+#include <boost/compute/event.hpp>
+
+namespace boost {
+namespace compute {
+
+template<class T> class future;
+
+/// \class wait_list
+/// \brief Stores a list of events.
+///
+/// The wait_list class stores a set of event objects and can be used to
+/// specify dependencies for OpenCL operations or to wait on the host until
+/// all of the events have completed.
+///
+/// This class also provides convenience fnuctions for interacting with
+/// OpenCL APIs which typically accept event dependencies as a \c cl_event*
+/// pointer and a \c cl_uint size. For example:
+/// \code
+/// wait_list events = ...;
+///
+/// clEnqueueNDRangeKernel(..., events.get_event_ptr(), events.size(), ...);
+/// \endcode
+///
+/// \see event, \ref future "future<T>"
+class wait_list
+{
+public:
+ typedef std::vector<event>::iterator iterator;
+ typedef std::vector<event>::const_iterator const_iterator;
+
+ /// Creates an empty wait-list.
+ wait_list()
+ {
+ }
+
+ /// Creates a wait-list containing \p event.
+ wait_list(const event &event)
+ {
+ insert(event);
+ }
+
+ /// Creates a new wait-list as a copy of \p other.
+ wait_list(const wait_list &other)
+ : m_events(other.m_events)
+ {
+ }
+
+ /// Copies the events in the wait-list from \p other.
+ wait_list& operator=(const wait_list &other)
+ {
+ if(this != &other){
+ m_events = other.m_events;
+ }
+
+ return *this;
+ }
+
+ #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
+ /// Move-constructs a new wait list object from \p other.
+ wait_list(wait_list&& other)
+ : m_events(std::move(other.m_events))
+ {
+ }
+
+ /// Move-assigns the wait list from \p other to \c *this.
+ wait_list& operator=(wait_list&& other)
+ {
+ m_events = std::move(other.m_events);
+
+ return *this;
+ }
+ #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
+
+ /// Destroys the wait-list.
+ ~wait_list()
+ {
+ }
+
+ /// Returns \c true if the wait-list is empty.
+ bool empty() const
+ {
+ return m_events.empty();
+ }
+
+ /// Returns the number of events in the wait-list.
+ uint_ size() const
+ {
+ return static_cast<uint_>(m_events.size());
+ }
+
+ /// Removes all of the events from the wait-list.
+ void clear()
+ {
+ m_events.clear();
+ }
+
+ /// Returns a cl_event pointer to the first event in the wait-list.
+ /// Returns \c 0 if the wait-list is empty.
+ ///
+ /// This can be used to pass the wait-list to OpenCL functions which
+ /// expect a \c cl_event pointer to refer to a list of events.
+ const cl_event* get_event_ptr() const
+ {
+ if(empty()){
+ return 0;
+ }
+
+ return reinterpret_cast<const cl_event *>(&m_events[0]);
+ }
+
+ /// Reserves a minimum length of storage for the wait list object.
+ void reserve(size_t new_capacity) {
+ m_events.reserve(new_capacity);
+ }
+
+ /// Inserts \p event into the wait-list.
+ void insert(const event &event)
+ {
+ m_events.push_back(event);
+ }
+
+ /// Inserts the event from \p future into the wait-list.
+ template<class T>
+ void insert(const future<T> &future)
+ {
+ insert(future.get_event());
+ }
+
+ /// Blocks until all of the events in the wait-list have completed.
+ ///
+ /// Does nothing if the wait-list is empty.
+ void wait() const
+ {
+ if(!empty()){
+ BOOST_COMPUTE_ASSERT_CL_SUCCESS(
+ clWaitForEvents(size(), get_event_ptr())
+ );
+ }
+ }
+
+ /// Returns a reference to the event at specified location \p pos.
+ const event& operator[](size_t pos) const {
+ return m_events[pos];
+ }
+
+ /// Returns a reference to the event at specified location \p pos.
+ event& operator[](size_t pos) {
+ return m_events[pos];
+ }
+
+ /// Returns an iterator to the first element of the wait-list.
+ iterator begin() {
+ return m_events.begin();
+ }
+
+ /// Returns an iterator to the first element of the wait-list.
+ const_iterator begin() const {
+ return m_events.begin();
+ }
+
+ /// Returns an iterator to the first element of the wait-list.
+ const_iterator cbegin() const {
+ return m_events.begin();
+ }
+
+ /// Returns an iterator to the element following the last element of the wait-list.
+ iterator end() {
+ return m_events.end();
+ }
+
+ /// Returns an iterator to the element following the last element of the wait-list.
+ const_iterator end() const {
+ return m_events.end();
+ }
+
+ /// Returns an iterator to the element following the last element of the wait-list.
+ const_iterator cend() const {
+ return m_events.end();
+ }
+
+private:
+ std::vector<event> m_events;
+};
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP
diff --git a/boost/compute/version.hpp b/boost/compute/version.hpp
new file mode 100644
index 0000000000..fdf6caeeaf
--- /dev/null
+++ b/boost/compute/version.hpp
@@ -0,0 +1,18 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_VERSION_HPP
+#define BOOST_COMPUTE_VERSION_HPP
+
+#define BOOST_COMPUTE_VERSION_MAJOR 0
+#define BOOST_COMPUTE_VERSION_MINOR 5
+#define BOOST_COMPUTE_VERSION_PATCH 0
+
+#endif // BOOST_COMPUTE_VERSION_HPP
diff --git a/boost/compute/wait_list.hpp b/boost/compute/wait_list.hpp
new file mode 100644
index 0000000000..a6bda45991
--- /dev/null
+++ b/boost/compute/wait_list.hpp
@@ -0,0 +1,12 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+// deprecated, use <boost/compute/utility/wait_list.hpp> instead
+#include <boost/compute/utility/wait_list.hpp>