//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline size_t count_if_with_ballot(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { size_t count = iterator_range_size(first, last); size_t block_size = 32; size_t block_count = count / block_size; if(block_count * block_size != count){ block_count++; } const ::boost::compute::context &context = queue.get_context(); ::boost::compute::vector counts(block_count, context); ::boost::compute::detail::nvidia_popcount popc; ::boost::compute::detail::nvidia_ballot ballot; meta_kernel k("count_if_with_ballot"); k << "const uint gid = get_global_id(0);\n" << "bool value = false;\n" << "if(gid < count)\n" << " value = " << predicate(first[k.var("gid")]) << ";\n" << "uint bits = " << ballot(k.var("value")) << ";\n" << "if(get_local_id(0) == 0)\n" << counts.begin()[k.var("get_group_id(0)") ] << " = " << popc(k.var("bits")) << ";\n"; k.add_set_arg("count", count); k.exec_1d(queue, 0, block_size * block_count, block_size); uint_ result; ::boost::compute::reduce( counts.begin(), counts.end(), &result, queue ); return result; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP