diff options
Diffstat (limited to 'boost/move/algo/adaptive_merge.hpp')
-rw-r--r-- | boost/move/algo/adaptive_merge.hpp | 253 |
1 files changed, 253 insertions, 0 deletions
diff --git a/boost/move/algo/adaptive_merge.hpp b/boost/move/algo/adaptive_merge.hpp index 0233b232e3..0040fda065 100644 --- a/boost/move/algo/adaptive_merge.hpp +++ b/boost/move/algo/adaptive_merge.hpp @@ -18,6 +18,259 @@ namespace boost { namespace movelib { +///@cond +namespace detail_adaptive { + +template<class RandIt, class Compare, class XBuf> +inline void adaptive_merge_combine_blocks( RandIt first + , typename iterator_traits<RandIt>::size_type len1 + , typename iterator_traits<RandIt>::size_type len2 + , typename iterator_traits<RandIt>::size_type collected + , typename iterator_traits<RandIt>::size_type n_keys + , typename iterator_traits<RandIt>::size_type l_block + , bool use_internal_buf + , bool xbuf_used + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const len = len1+len2; + size_type const l_combine = len-collected; + size_type const l_combine1 = len1-collected; + + if(n_keys){ + RandIt const first_data = first+collected; + RandIt const keys = first; + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); + if(xbuf_used){ + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } + BOOST_ASSERT(xbuf.size() >= l_block); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, comp, l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + op_merge_blocks_with_buf + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); + } + else{ + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, comp, l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + if(use_internal_buf){ + op_merge_blocks_with_buf + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); + } + else{ + merge_blocks_bufferless + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); + } + } + } + else{ + xbuf.shrink_to_fit(l_block); + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } + size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( uint_keys, less(), l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); + BOOST_ASSERT(xbuf.size() >= l_block); + op_merge_blocks_with_buf + (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); + xbuf.clear(); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); + } +} + +template<class RandIt, class Compare, class XBuf> +inline void adaptive_merge_final_merge( RandIt first + , typename iterator_traits<RandIt>::size_type len1 + , typename iterator_traits<RandIt>::size_type len2 + , typename iterator_traits<RandIt>::size_type collected + , typename iterator_traits<RandIt>::size_type l_intbuf + , typename iterator_traits<RandIt>::size_type l_block + , bool use_internal_buf + , bool xbuf_used + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + (void)l_block; + size_type n_keys = collected-l_intbuf; + size_type len = len1+len2; + if(use_internal_buf){ + if(xbuf_used){ + xbuf.clear(); + //Nothing to do + if(n_keys){ + unstable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len); + } + } + else{ + xbuf.clear(); + unstable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); + stable_merge(first, first+collected, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); + } + } + else{ + xbuf.clear(); + unstable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); + stable_merge(first, first+collected, first+len1+len2, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); +} + +template<class SizeType, class Xbuf> +inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) +{ + typedef SizeType size_type; + size_type l_block = rl_block; + size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; + + while(xbuf.capacity() >= l_block*2){ + l_block *= 2; + } + + //This is the minimum number of keys to implement the ideal algorithm + size_type n_keys = len1/l_block+len2/l_block; + while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ + --n_keys; + } + ++n_keys; + BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); + + if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ + n_keys = 0u; + } + l_intbuf_inout = l_intbuf; + rl_block = l_block; + return n_keys; +} + +// Main explanation of the merge algorithm. +// +// csqrtlen = ceil(sqrt(len)); +// +// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect +// unique elements are extracted from elements to be sorted and placed in the beginning of the range. +// +// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements +// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. +// +// Explanation of the "combine_blocks" step: +// +// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. +// Remaining elements that can't form a group are grouped in front of those elements. +// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. +// Remaining elements that can't form a group are grouped in the back of those elements. +// * In parallel the following two steps are performed: +// * Groups are selection-sorted by first or last element (depending whether they are going +// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair are merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). +// +// * In the final merge step leading "to_collect" elements are merged with rotations +// with the rest of merged elements in the "combine_blocks" step. +// +// Corner cases: +// +// * If no "to_collect" elements can be extracted: +// +// * If more than a minimum number of elements is extracted +// then reduces the number of elements used as buffer and keys in the +// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction +// then uses a rotation based smart merge. +// +// * If the minimum number of keys can't be extracted, a rotation-based merge is performed. +// +// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. +// +// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. +// +// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), +// then no csqrtlen need to be extracted and "combine_blocks" will use integral +// keys to combine blocks. +template<class RandIt, class Compare, class XBuf> +void adaptive_merge_impl + ( RandIt first + , typename iterator_traits<RandIt>::size_type len1 + , typename iterator_traits<RandIt>::size_type len2 + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + + if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ + buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); + } + else{ + const size_type len = len1+len2; + //Calculate ideal parameters and try to collect needed unique keys + size_type l_block = size_type(ceil_sqrt(len)); + + //One range is not big enough to extract keys and the internal buffer so a + //rotation-based based merge will do just fine + if(len1 <= l_block*2 || len2 <= l_block*2){ + merge_bufferless(first, first+len1, first+len1+len2, comp); + return; + } + + //Detail the number of keys and internal buffer. If xbuf has enough memory, no + //internal buffer is needed so l_intbuf will remain 0. + size_type l_intbuf = 0; + size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); + size_type const to_collect = l_intbuf+n_keys; + //Try to extract needed unique values from the first range + size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); + + //Not the minimum number of keys is not available on the first range, so fallback to rotations + if(collected != to_collect && collected < 4){ + merge_bufferless(first, first+collected, first+len1, comp); + merge_bufferless(first, first + len1, first + len1 + len2, comp); + return; + } + + //If not enough keys but more than minimum, adjust the internal buffer and key count + bool use_internal_buf = collected == to_collect; + if (!use_internal_buf){ + l_intbuf = 0u; + n_keys = collected; + l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); + //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used + l_intbuf = use_internal_buf ? l_block : 0u; + } + + bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; + //Merge trailing elements using smart merges + adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); + //Merge buffer and keys with the rest of the values + adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); + } +} + +} //namespace detail_adaptive { + +///@endcond + //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last) //! into one sorted range [first, last) according to the given comparison function comp. //! The algorithm is stable (if there are equivalent elements in the original two ranges, |