diff options
Diffstat (limited to 'boost/move')
-rw-r--r-- | boost/move/adl_move_swap.hpp | 6 | ||||
-rw-r--r-- | boost/move/algo/adaptive_merge.hpp | 253 | ||||
-rw-r--r-- | boost/move/algo/adaptive_sort.hpp | 552 | ||||
-rw-r--r-- | boost/move/algo/detail/adaptive_sort_merge.hpp | 1208 | ||||
-rw-r--r-- | boost/move/algo/detail/heap_sort.hpp | 111 | ||||
-rw-r--r-- | boost/move/algo/detail/insertion_sort.hpp | 10 | ||||
-rw-r--r-- | boost/move/algo/detail/is_sorted.hpp | 55 | ||||
-rw-r--r-- | boost/move/algo/detail/merge.hpp | 126 | ||||
-rw-r--r-- | boost/move/algo/detail/pdqsort.hpp | 334 | ||||
-rw-r--r-- | boost/move/algo/detail/set_difference.hpp | 207 | ||||
-rw-r--r-- | boost/move/detail/type_traits.hpp | 9 |
11 files changed, 1793 insertions, 1078 deletions
diff --git a/boost/move/adl_move_swap.hpp b/boost/move/adl_move_swap.hpp index d6906a483f..d9096e36c3 100644 --- a/boost/move/adl_move_swap.hpp +++ b/boost/move/adl_move_swap.hpp @@ -261,6 +261,12 @@ BidirIt2 adl_move_swap_ranges_backward(BidirIt1 first1, BidirIt1 last1, BidirIt2 return last2; } +template<class ForwardIt1, class ForwardIt2> +void adl_move_iter_swap(ForwardIt1 a, ForwardIt2 b) +{ + boost::adl_move_swap(*a, *b); +} + } //namespace boost{ #endif //#ifndef BOOST_MOVE_ADL_MOVE_SWAP_HPP diff --git a/boost/move/algo/adaptive_merge.hpp b/boost/move/algo/adaptive_merge.hpp index 0233b232e3..0040fda065 100644 --- a/boost/move/algo/adaptive_merge.hpp +++ b/boost/move/algo/adaptive_merge.hpp @@ -18,6 +18,259 @@ namespace boost { namespace movelib { +///@cond +namespace detail_adaptive { + +template<class RandIt, class Compare, class XBuf> +inline void adaptive_merge_combine_blocks( RandIt first + , typename iterator_traits<RandIt>::size_type len1 + , typename iterator_traits<RandIt>::size_type len2 + , typename iterator_traits<RandIt>::size_type collected + , typename iterator_traits<RandIt>::size_type n_keys + , typename iterator_traits<RandIt>::size_type l_block + , bool use_internal_buf + , bool xbuf_used + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const len = len1+len2; + size_type const l_combine = len-collected; + size_type const l_combine1 = len1-collected; + + if(n_keys){ + RandIt const first_data = first+collected; + RandIt const keys = first; + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); + if(xbuf_used){ + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } + BOOST_ASSERT(xbuf.size() >= l_block); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, comp, l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + op_merge_blocks_with_buf + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); + } + else{ + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, comp, l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + if(use_internal_buf){ + op_merge_blocks_with_buf + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); + } + else{ + merge_blocks_bufferless + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); + } + } + } + else{ + xbuf.shrink_to_fit(l_block); + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } + size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( uint_keys, less(), l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); + BOOST_ASSERT(xbuf.size() >= l_block); + op_merge_blocks_with_buf + (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); + xbuf.clear(); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); + } +} + +template<class RandIt, class Compare, class XBuf> +inline void adaptive_merge_final_merge( RandIt first + , typename iterator_traits<RandIt>::size_type len1 + , typename iterator_traits<RandIt>::size_type len2 + , typename iterator_traits<RandIt>::size_type collected + , typename iterator_traits<RandIt>::size_type l_intbuf + , typename iterator_traits<RandIt>::size_type l_block + , bool use_internal_buf + , bool xbuf_used + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + (void)l_block; + size_type n_keys = collected-l_intbuf; + size_type len = len1+len2; + if(use_internal_buf){ + if(xbuf_used){ + xbuf.clear(); + //Nothing to do + if(n_keys){ + unstable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len); + } + } + else{ + xbuf.clear(); + unstable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); + stable_merge(first, first+collected, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); + } + } + else{ + xbuf.clear(); + unstable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); + stable_merge(first, first+collected, first+len1+len2, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); +} + +template<class SizeType, class Xbuf> +inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) +{ + typedef SizeType size_type; + size_type l_block = rl_block; + size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; + + while(xbuf.capacity() >= l_block*2){ + l_block *= 2; + } + + //This is the minimum number of keys to implement the ideal algorithm + size_type n_keys = len1/l_block+len2/l_block; + while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ + --n_keys; + } + ++n_keys; + BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); + + if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ + n_keys = 0u; + } + l_intbuf_inout = l_intbuf; + rl_block = l_block; + return n_keys; +} + +// Main explanation of the merge algorithm. +// +// csqrtlen = ceil(sqrt(len)); +// +// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect +// unique elements are extracted from elements to be sorted and placed in the beginning of the range. +// +// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements +// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. +// +// Explanation of the "combine_blocks" step: +// +// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. +// Remaining elements that can't form a group are grouped in front of those elements. +// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. +// Remaining elements that can't form a group are grouped in the back of those elements. +// * In parallel the following two steps are performed: +// * Groups are selection-sorted by first or last element (depending whether they are going +// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair are merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). +// +// * In the final merge step leading "to_collect" elements are merged with rotations +// with the rest of merged elements in the "combine_blocks" step. +// +// Corner cases: +// +// * If no "to_collect" elements can be extracted: +// +// * If more than a minimum number of elements is extracted +// then reduces the number of elements used as buffer and keys in the +// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction +// then uses a rotation based smart merge. +// +// * If the minimum number of keys can't be extracted, a rotation-based merge is performed. +// +// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. +// +// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. +// +// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), +// then no csqrtlen need to be extracted and "combine_blocks" will use integral +// keys to combine blocks. +template<class RandIt, class Compare, class XBuf> +void adaptive_merge_impl + ( RandIt first + , typename iterator_traits<RandIt>::size_type len1 + , typename iterator_traits<RandIt>::size_type len2 + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + + if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ + buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); + } + else{ + const size_type len = len1+len2; + //Calculate ideal parameters and try to collect needed unique keys + size_type l_block = size_type(ceil_sqrt(len)); + + //One range is not big enough to extract keys and the internal buffer so a + //rotation-based based merge will do just fine + if(len1 <= l_block*2 || len2 <= l_block*2){ + merge_bufferless(first, first+len1, first+len1+len2, comp); + return; + } + + //Detail the number of keys and internal buffer. If xbuf has enough memory, no + //internal buffer is needed so l_intbuf will remain 0. + size_type l_intbuf = 0; + size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); + size_type const to_collect = l_intbuf+n_keys; + //Try to extract needed unique values from the first range + size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); + + //Not the minimum number of keys is not available on the first range, so fallback to rotations + if(collected != to_collect && collected < 4){ + merge_bufferless(first, first+collected, first+len1, comp); + merge_bufferless(first, first + len1, first + len1 + len2, comp); + return; + } + + //If not enough keys but more than minimum, adjust the internal buffer and key count + bool use_internal_buf = collected == to_collect; + if (!use_internal_buf){ + l_intbuf = 0u; + n_keys = collected; + l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); + //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used + l_intbuf = use_internal_buf ? l_block : 0u; + } + + bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; + //Merge trailing elements using smart merges + adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); + //Merge buffer and keys with the rest of the values + adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); + } +} + +} //namespace detail_adaptive { + +///@endcond + //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last) //! into one sorted range [first, last) according to the given comparison function comp. //! The algorithm is stable (if there are equivalent elements in the original two ranges, diff --git a/boost/move/algo/adaptive_sort.hpp b/boost/move/algo/adaptive_sort.hpp index c96ab2d78b..2026f9c1b5 100644 --- a/boost/move/algo/adaptive_sort.hpp +++ b/boost/move/algo/adaptive_sort.hpp @@ -18,6 +18,558 @@ namespace boost { namespace movelib { +///@cond +namespace detail_adaptive { + +template<class RandIt> +void move_data_backward( RandIt cur_pos + , typename iterator_traits<RandIt>::size_type const l_data + , RandIt new_pos + , bool const xbuf_used) +{ + //Move buffer to the total combination right + if(xbuf_used){ + boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); + } + else{ + boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); + //Rotate does less moves but it seems slower due to cache issues + //rotate_gcd(first-l_block, first+len-l_block, first+len); + } +} + +template<class RandIt> +void move_data_forward( RandIt cur_pos + , typename iterator_traits<RandIt>::size_type const l_data + , RandIt new_pos + , bool const xbuf_used) +{ + //Move buffer to the total combination right + if(xbuf_used){ + boost::move(cur_pos, cur_pos+l_data, new_pos); + } + else{ + boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); + //Rotate does less moves but it seems slower due to cache issues + //rotate_gcd(first-l_block, first+len-l_block, first+len); + } +} + +// build blocks of length 2*l_build_buf. l_build_buf is power of two +// input: [0, l_build_buf) elements are buffer, rest unsorted elements +// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted +// +// First elements are merged from right to left until elements start +// at first. All old elements [first, first + l_build_buf) are placed at the end +// [first+len-l_build_buf, first+len). To achieve this: +// - If we have external memory to merge, we save elements from the buffer +// so that a non-swapping merge is used. Buffer elements are restored +// at the end of the buffer from the external memory. +// +// - When the external memory is not available or it is insufficient +// for a merge operation, left swap merging is used. +// +// Once elements are merged left to right in blocks of l_build_buf, then a single left +// to right merge step is performed to achieve merged blocks of size 2K. +// If external memory is available, usual merge is used, swap merging otherwise. +// +// As a last step, if auxiliary memory is available in-place merge is performed. +// until all is merged or auxiliary memory is not large enough. +template<class RandIt, class Compare, class XBuf> +typename iterator_traits<RandIt>::size_type + adaptive_sort_build_blocks + ( RandIt const first + , typename iterator_traits<RandIt>::size_type const len + , typename iterator_traits<RandIt>::size_type const l_base + , typename iterator_traits<RandIt>::size_type const l_build_buf + , XBuf & xbuf + , Compare comp) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + BOOST_ASSERT(l_build_buf <= len); + BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); + + //Place the start pointer after the buffer + RandIt first_block = first + l_build_buf; + size_type const elements_in_blocks = len - l_build_buf; + + ////////////////////////////////// + // Start of merge to left step + ////////////////////////////////// + size_type l_merged = 0u; + + BOOST_ASSERT(l_build_buf); + //If there is no enough buffer for the insertion sort step, just avoid the external buffer + size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); + kbuf = kbuf < l_base ? 0 : kbuf; + + if(kbuf){ + //Backup internal buffer values in external buffer so they can be overwritten + xbuf.move_assign(first+l_build_buf-kbuf, kbuf); + l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); + + //Now combine them using the buffer. Elements from buffer can be + //overwritten since they've been saved to xbuf + l_merged = op_merge_left_step_multiple + ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); + + //Restore internal buffer from external buffer unless kbuf was l_build_buf, + //in that case restoration will happen later + if(kbuf != l_build_buf){ + boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); + } + } + else{ + l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); + rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); + } + + //Now combine elements using the buffer. Elements from buffer can't be + //overwritten since xbuf was not big enough, so merge swapping elements. + l_merged = op_merge_left_step_multiple + (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); + + BOOST_ASSERT(l_merged == l_build_buf); + + ////////////////////////////////// + // Start of merge to right step + ////////////////////////////////// + + //If kbuf is l_build_buf then we can merge right without swapping + //Saved data is still in xbuf + if(kbuf && kbuf == l_build_buf){ + op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); + //Restore internal buffer from external buffer if kbuf was l_build_buf. + //as this operation was previously delayed. + boost::move(xbuf.data(), xbuf.data() + kbuf, first); + } + else{ + op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); + } + xbuf.clear(); + //2*l_build_buf or total already merged + return min_value(elements_in_blocks, 2*l_build_buf); +} + +template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf> +void adaptive_sort_combine_blocks + ( RandItKeys const keys + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const len + , typename iterator_traits<RandIt>::size_type const l_prev_merged + , typename iterator_traits<RandIt>::size_type const l_block + , bool const use_buf + , bool const xbuf_used + , XBuf & xbuf + , Compare comp + , bool merge_left) +{ + (void)xbuf; + typedef typename iterator_traits<RandIt>::size_type size_type; + + size_type const l_reg_combined = 2*l_prev_merged; + size_type l_irreg_combined = 0; + size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); + size_type const n_reg_combined = len/l_reg_combined; + RandIt combined_first = first; + + (void)l_total_combined; + BOOST_ASSERT(l_total_combined <= len); + + size_type const max_i = n_reg_combined + (l_irreg_combined != 0); + + if(merge_left || !use_buf) { + for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { + //Now merge blocks + bool const is_last = combined_i==n_reg_combined; + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, key_comp, l_cur_combined + , l_prev_merged, l_block, rbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); + if(!use_buf){ + merge_blocks_bufferless + (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); + } + else{ + merge_blocks_left + (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block); + } + } + else{ + combined_first += l_reg_combined*(max_i-1); + for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { + bool const is_last = combined_i==n_reg_combined; + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + RandIt const combined_last(combined_first+l_cur_combined); + range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, key_comp, l_cur_combined + , l_prev_merged, l_block, rbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); + merge_blocks_right + (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block); + } + } +} + +//Returns true if buffer is placed in +//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is +//[buffer,buffer+l_intbuf) +template<class RandIt, class Compare, class XBuf> +bool adaptive_sort_combine_all_blocks + ( RandIt keys + , typename iterator_traits<RandIt>::size_type &n_keys + , RandIt const buffer + , typename iterator_traits<RandIt>::size_type const l_buf_plus_data + , typename iterator_traits<RandIt>::size_type l_merged + , typename iterator_traits<RandIt>::size_type &l_intbuf + , XBuf & xbuf + , Compare comp) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + RandIt const first = buffer + l_intbuf; + size_type const l_data = l_buf_plus_data - l_intbuf; + size_type const l_unique = l_intbuf+n_keys; + //Backup data to external buffer once if possible + bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); + if(common_xbuf){ + xbuf.move_assign(buffer, l_intbuf); + } + + bool prev_merge_left = true; + size_type l_prev_total_combined = l_merged, l_prev_block = 0; + bool prev_use_internal_buf = true; + + for( size_type n = 0; l_data > l_merged + ; l_merged*=2 + , ++n){ + //If l_intbuf is non-zero, use that internal buffer. + // Implies l_block == l_intbuf && use_internal_buf == true + //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, + // Implies l_block == n_keys/2 && use_internal_buf == true + //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) + bool use_internal_buf = false; + size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf); + BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); + BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); + BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); + + bool const is_merge_left = (n&1) == 0; + size_type const l_total_combined = calculate_total_combined(l_data, l_merged); + if(n && prev_use_internal_buf && prev_merge_left){ + if(is_merge_left || !use_internal_buf){ + move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); + } + else{ + //Put the buffer just after l_total_combined + RandIt const buf_end = first+l_prev_total_combined; + RandIt const buf_beg = buf_end-l_block; + if(l_prev_total_combined > l_total_combined){ + size_type const l_diff = l_prev_total_combined - l_total_combined; + move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); + } + else if(l_prev_total_combined < l_total_combined){ + size_type const l_diff = l_total_combined - l_prev_total_combined; + move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf); + } + + //Combine to form l_merged*2 segments + if(n_keys){ + adaptive_sort_combine_blocks + ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + else{ + size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); + adaptive_sort_combine_blocks + ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf); + prev_merge_left = is_merge_left; + l_prev_total_combined = l_total_combined; + l_prev_block = l_block; + prev_use_internal_buf = use_internal_buf; + } + BOOST_ASSERT(l_prev_total_combined == l_data); + bool const buffer_right = prev_use_internal_buf && prev_merge_left; + + l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; + n_keys = l_unique - l_intbuf; + //Restore data from to external common buffer if used + if(common_xbuf){ + if(buffer_right){ + boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); + } + else{ + boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); + } + } + return buffer_right; +} + + +template<class RandIt, class Compare, class XBuf> +void adaptive_sort_final_merge( bool buffer_right + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_intbuf + , typename iterator_traits<RandIt>::size_type const n_keys + , typename iterator_traits<RandIt>::size_type const len + , XBuf & xbuf + , Compare comp) +{ + //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); + xbuf.clear(); + + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const n_key_plus_buf = l_intbuf+n_keys; + if(buffer_right){ + //Use stable sort as some buffer elements might not be unique (see non_unique_buf) + stable_sort(first+len-l_intbuf, first+len, comp, xbuf); + stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); + unstable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + else{ + //Use stable sort as some buffer elements might not be unique (see non_unique_buf) + stable_sort(first, first+n_key_plus_buf, comp, xbuf); + if(xbuf.capacity() >= n_key_plus_buf){ + buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); + } + else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ + stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + else{ + stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len); +} + +template<class RandIt, class Compare, class Unsigned, class XBuf> +bool adaptive_sort_build_params + (RandIt first, Unsigned const len, Compare comp + , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf + , XBuf & xbuf + ) +{ + typedef Unsigned size_type; + + //Calculate ideal parameters and try to collect needed unique keys + l_base = 0u; + + //Try to find a value near sqrt(len) that is 2^N*l_base where + //l_base <= AdaptiveSortInsertionSortThreshold. This property is important + //as build_blocks merges to the left iteratively duplicating the + //merged size and all the buffer must be used just before the final + //merge to right step. This guarantees "build_blocks" produces + //segments of size l_build_buf*2, maximizing the classic merge phase. + l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); + + //The internal buffer can be expanded if there is enough external memory + while(xbuf.capacity() >= l_intbuf*2){ + l_intbuf *= 2; + } + + //This is the minimum number of keys to implement the ideal algorithm + // + //l_intbuf is used as buffer plus the key count + size_type n_min_ideal_keys = l_intbuf-1; + while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ + --n_min_ideal_keys; + } + n_min_ideal_keys += 1; + BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); + + if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){ + n_keys = 0u; + l_build_buf = l_intbuf; + } + else{ + //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that + //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half + //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. + // + //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, + //(to be used for keys in combine_all_blocks) as the whole l_build_buf + //will be backuped in the buffer during build_blocks. + bool const non_unique_buf = xbuf.capacity() >= l_intbuf; + size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2; + size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); + + //If available memory is 2*sqrt(l), then for "build_params" + //the situation is the same as if 2*l_intbuf were collected. + if(non_unique_buf && collected == n_min_ideal_keys){ + l_build_buf = l_intbuf; + n_keys = n_min_ideal_keys; + } + else if(collected == 2*l_intbuf){ + //l_intbuf*2 elements found. Use all of them in the build phase + l_build_buf = l_intbuf*2; + n_keys = l_intbuf; + } + else if(collected == (n_min_ideal_keys+l_intbuf)){ + l_build_buf = l_intbuf; + n_keys = n_min_ideal_keys; + } + //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix + //is possible (due to very low unique keys), then go to a slow sort based on rotations. + else{ + BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); + if(collected < 4){ //No combination possible with less that 4 keys + return false; + } + n_keys = l_intbuf; + while(n_keys&(n_keys-1)){ + n_keys &= n_keys-1; // make it power or 2 + } + while(n_keys > collected){ + n_keys/=2; + } + //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two + l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); + l_intbuf = 0; + l_build_buf = n_keys; + } + BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); + } + + return true; +} + +// Main explanation of the sort algorithm. +// +// csqrtlen = ceil(sqrt(len)); +// +// * First, 2*csqrtlen unique elements elements are extracted from elements to be +// sorted and placed in the beginning of the range. +// +// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements +// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are +// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step +// 2*csqrtlen unique elements are again the leading elements of the whole range. +// +// * Step "combine_blocks": pairs of previously formed blocks are merged with a different +// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the +// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen +// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. +// +// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to +// know if elements belong to the first or second block to be merged and another +// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: +// +// Iteratively until all trailing (len-2*csqrtlen) elements are merged: +// Iteratively for each pair of previously merged block: +// * Blocks are divided groups of csqrtlen elements and +// 2*merged_block/csqrtlen keys are sorted to be used as markers +// * Groups are selection-sorted by first or last element (depending whether they are going +// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair are merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). +// +// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with +// rotations with the rest of sorted elements in the "combine_blocks" step. +// +// Corner cases: +// +// * If no 2*csqrtlen elements can be extracted: +// +// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used +// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This +// means that an additional "combine_blocks" step will be needed to merge all elements. +// +// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, +// then reduces the number of elements used as buffer and keys in the "build_blocks" +// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction +// then uses a rotation based smart merge. +// +// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. +// +// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. +// +// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), +// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral +// keys to combine blocks. +// +// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks +// using classic merge and "combine_blocks" will use bigger blocks when merging. +template<class RandIt, class Compare, class XBuf> +void adaptive_sort_impl + ( RandIt first + , typename iterator_traits<RandIt>::size_type const len + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + + //Small sorts go directly to insertion sort + if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ + insertion_sort(first, first + len, comp); + } + else if((len-len/2) <= xbuf.capacity()){ + merge_sort(first, first+len, comp, xbuf.data()); + } + else{ + //Make sure it is at least four + BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); + + size_type l_base = 0; + size_type l_intbuf = 0; + size_type n_keys = 0; + size_type l_build_buf = 0; + + //Calculate and extract needed unique elements. If a minimum is not achieved + //fallback to a slow stable sort + if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ + stable_sort(first, first+len, comp, xbuf); + } + else{ + BOOST_ASSERT(l_build_buf); + //Otherwise, continue the adaptive_sort + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len); + size_type const n_key_plus_buf = l_intbuf+n_keys; + //l_build_buf is always power of two if l_intbuf is zero + BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); + + //Classic merge sort until internal buffer and xbuf are exhausted + size_type const l_merged = adaptive_sort_build_blocks + (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len); + + //Non-trivial merge + bool const buffer_right = adaptive_sort_combine_all_blocks + (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); + + //Sort keys and buffer and merge the whole sequence + adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); + } + } +} + +} //namespace detail_adaptive { + +///@endcond + //! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according //! to comparison functor "comp". The sort is stable (order of equal elements //! is guaranteed to be preserved). Performance is improved if additional raw storage is diff --git a/boost/move/algo/detail/adaptive_sort_merge.hpp b/boost/move/algo/detail/adaptive_sort_merge.hpp index 5085100ad0..1606fde66a 100644 --- a/boost/move/algo/detail/adaptive_sort_merge.hpp +++ b/boost/move/algo/detail/adaptive_sort_merge.hpp @@ -49,7 +49,9 @@ #include <boost/move/adl_move_swap.hpp> #include <boost/move/algo/detail/insertion_sort.hpp> #include <boost/move/algo/detail/merge_sort.hpp> +#include <boost/move/algo/detail/heap_sort.hpp> #include <boost/move/algo/detail/merge.hpp> +#include <boost/move/algo/detail/is_sorted.hpp> #include <boost/assert.hpp> #include <boost/cstdint.hpp> @@ -84,11 +86,26 @@ #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L) #endif - - namespace boost { namespace movelib { +#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS) + +bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less) +{ + if (first != last) { + const order_perf_type *next = first, *cur(first); + while (++next != last) { + if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val))) + return false; + cur = next; + } + } + return true; +} + +#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS + namespace detail_adaptive { static const std::size_t AdaptiveSortInsertionSortThreshold = 16; @@ -113,51 +130,6 @@ const T &max_value(const T &a, const T &b) return a > b ? a : b; } -template<class ForwardIt, class Pred> -bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred) -{ - if (first != last) { - ForwardIt next = first, cur(first); - while (++next != last) { - if (pred(*next, *cur)) - return false; - cur = next; - } - } - return true; -} - -#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS) - -bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less) -{ - if (first != last) { - const order_perf_type *next = first, *cur(first); - while (++next != last) { - if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val))) - return false; - cur = next; - } - } - return true; -} - -#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS - -template<class ForwardIt, class Pred> -bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred) -{ - if (first != last) { - ForwardIt next = first; - while (++next != last) { - if (!pred(*first, *next)) - return false; - first = next; - } - } - return true; -} - template<class ForwardIt, class Pred, class V> typename iterator_traits<ForwardIt>::size_type count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v) @@ -414,44 +386,6 @@ RandIt skip_until_merge } -template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> -RandItB op_buffered_partial_merge_to_range1_and_buffer - ( RandIt1 first1, RandIt1 const last1 - , RandIt2 &rfirst2, RandIt2 const last2 - , RandItB &rfirstb, Compare comp, Op op ) -{ - RandItB firstb = rfirstb; - RandItB lastb = firstb; - RandIt2 first2 = rfirst2; - - //Move to buffer while merging - //Three way moves need less moves when op is swap_op so use it - //when merging elements from range2 to the destination occupied by range1 - if(first1 != last1 && first2 != last2){ - op(three_way_t(), first2++, first1++, lastb++); - - while(true){ - if(first1 == last1){ - break; - } - if(first2 == last2){ - lastb = op(forward_t(), first1, last1, firstb); - break; - } - if (comp(*first2, *firstb)) { - op(three_way_t(), first2++, first1++, lastb++); - } - else { - op(three_way_t(), firstb++, first1++, lastb++); - } - } - rfirst2 = first2; - rfirstb = firstb; - } - - return lastb; -} - template<class RandItKeys, class RandIt> void swap_and_update_key ( RandItKeys const key_next @@ -567,7 +501,7 @@ void merge_blocks_bufferless typedef typename iterator_traits<RandIt>::size_type size_type; size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; //BOOST_ASSERT(n_block_a || n_block_b); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp)); BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); size_type n_bef_irreg2 = 0; @@ -890,11 +824,40 @@ void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf) } template<class RandIt, class Comp, class XBuf> +void unstable_sort( RandIt first, RandIt last + , Comp comp + , XBuf & xbuf) +{ + heap_sort(first, last, comp);(void)xbuf; +} + +template<class RandIt, class Compare, class XBuf> +void stable_merge + ( RandIt first, RandIt const middle, RandIt last + , Compare comp + , XBuf &xbuf) +{ + BOOST_ASSERT(xbuf.empty()); + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const len1 = size_type(middle-first); + size_type const len2 = size_type(last-middle); + size_type const l_min = min_value(len1, len2); + if(xbuf.capacity() >= l_min){ + buffered_merge(first, middle, last, comp, xbuf); + xbuf.clear(); + } + else{ + merge_bufferless(first, middle, last, comp); + } +} + +template<class RandIt, class Comp, class XBuf> void initialize_keys( RandIt first, RandIt last , Comp comp , XBuf & xbuf) { - stable_sort(first, last, comp, xbuf); + unstable_sort(first, last, comp, xbuf); + BOOST_ASSERT(boost::movelib::is_sorted_and_unique(first, last, comp)); } template<class RandIt, class U> @@ -910,40 +873,6 @@ void initialize_keys( RandIt first, RandIt last } } -template<class RandIt> -void move_data_backward( RandIt cur_pos - , typename iterator_traits<RandIt>::size_type const l_data - , RandIt new_pos - , bool const xbuf_used) -{ - //Move buffer to the total combination right - if(xbuf_used){ - boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); - } - else{ - boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); - //Rotate does less moves but it seems slower due to cache issues - //rotate_gcd(first-l_block, first+len-l_block, first+len); - } -} - -template<class RandIt> -void move_data_forward( RandIt cur_pos - , typename iterator_traits<RandIt>::size_type const l_data - , RandIt new_pos - , bool const xbuf_used) -{ - //Move buffer to the total combination right - if(xbuf_used){ - boost::move(cur_pos, cur_pos+l_data, new_pos); - } - else{ - boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); - //Rotate does less moves but it seems slower due to cache issues - //rotate_gcd(first-l_block, first+len-l_block, first+len); - } -} - template <class Unsigned> Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0) { @@ -994,43 +923,7 @@ void combine_params } } -template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> -RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer - ( RandIt1 first1, RandIt1 const last1 - , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min - , RandItB &rfirstb, Compare comp, Op op ) -{ - RandItB firstb = rfirstb; - RandItB lastb = firstb; - RandIt2 first2 = rfirst2; - //Move to buffer while merging - //Three way moves need less moves when op is swap_op so use it - //when merging elements from range2 to the destination occupied by range1 - if(first1 != last1 && first2 != last2){ - RandIt2 first_min = rfirst_min; - op(four_way_t(), first2++, first_min++, first1++, lastb++); - - while(first1 != last1){ - if(first2 == last2){ - lastb = op(forward_t(), first1, last1, firstb); - break; - } - - if(comp(*first_min, *firstb)){ - op( four_way_t(), first2++, first_min++, first1++, lastb++); - } - else{ - op(three_way_t(), firstb++, first1++, lastb++); - } - } - rfirst2 = first2; - rfirstb = firstb; - rfirst_min = first_min; - } - - return lastb; -} ////////////////////////////////// // @@ -1072,10 +965,14 @@ OutputIt op_partial_merge } ////////////////////////////////// +////////////////////////////////// +////////////////////////////////// // -// partial_merge_and_swap +// op_partial_merge_and_save // ////////////////////////////////// +////////////////////////////////// +////////////////////////////////// template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> OutputIt op_partial_merge_and_swap_impl (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op) @@ -1111,6 +1008,82 @@ OutputIt op_partial_merge_and_swap : op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable<Compare>(comp), op); } +template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> +RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer + ( RandIt1 first1, RandIt1 const last1 + , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min + , RandItB &rfirstb, Compare comp, Op op ) +{ + RandItB firstb = rfirstb; + RandItB lastb = firstb; + RandIt2 first2 = rfirst2; + + //Move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(first1 != last1 && first2 != last2){ + RandIt2 first_min = rfirst_min; + op(four_way_t(), first2++, first_min++, first1++, lastb++); + + while(first1 != last1){ + if(first2 == last2){ + lastb = op(forward_t(), first1, last1, firstb); + break; + } + + if(comp(*first_min, *firstb)){ + op( four_way_t(), first2++, first_min++, first1++, lastb++); + } + else{ + op(three_way_t(), firstb++, first1++, lastb++); + } + } + rfirst2 = first2; + rfirstb = firstb; + rfirst_min = first_min; + } + + return lastb; +} + +template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> +RandItB op_buffered_partial_merge_to_range1_and_buffer + ( RandIt1 first1, RandIt1 const last1 + , RandIt2 &rfirst2, RandIt2 const last2 + , RandItB &rfirstb, Compare comp, Op op ) +{ + RandItB firstb = rfirstb; + RandItB lastb = firstb; + RandIt2 first2 = rfirst2; + + //Move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(first1 != last1 && first2 != last2){ + op(three_way_t(), first2++, first1++, lastb++); + + while(true){ + if(first1 == last1){ + break; + } + if(first2 == last2){ + lastb = op(forward_t(), first1, last1, firstb); + break; + } + if (comp(*first2, *firstb)) { + op(three_way_t(), first2++, first1++, lastb++); + } + else { + op(three_way_t(), firstb++, first1++, lastb++); + } + } + rfirst2 = first2; + rfirstb = firstb; + } + + return lastb; +} + template<class RandIt, class RandItBuf, class Compare, class Op> RandIt op_partial_merge_and_save_impl ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min @@ -1162,7 +1135,15 @@ RandIt op_partial_merge_and_save ; } - +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// +// +// op_merge_blocks_with_irreg +// +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// template<class RandItKeys, class KeyCompare, class RandIt, class RandIt2, class OutputIt, class Compare, class Op> OutputIt op_merge_blocks_with_irreg @@ -1215,6 +1196,16 @@ OutputIt op_merge_blocks_with_irreg return dest; } +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// +// +// op_merge_blocks_left/right +// +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// + template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op> void op_merge_blocks_left ( RandItKeys const key_first @@ -1230,7 +1221,7 @@ void op_merge_blocks_left typedef typename iterator_traits<RandIt>::size_type size_type; size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; // BOOST_ASSERT(n_block_a || n_block_b); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp)); BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); size_type n_block_b_left = n_block_b; @@ -1394,7 +1385,6 @@ void merge_blocks_left } } - // first - first element to merge. // [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded @@ -1426,6 +1416,15 @@ void merge_blocks_right , inverse<Compare>(comp), xbuf_used); } +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// +// +// op_merge_blocks_with_buf +// +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class RandItBuf> void op_merge_blocks_with_buf ( RandItKeys key_first @@ -1443,7 +1442,7 @@ void op_merge_blocks_with_buf typedef typename iterator_traits<RandIt>::size_type size_type; size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; //BOOST_ASSERT(n_block_a || n_block_b); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp)); BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); size_type n_block_b_left = n_block_b; @@ -1552,29 +1551,15 @@ void op_merge_blocks_with_buf BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); } -template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class RandItBuf> -void merge_blocks_with_buf - ( RandItKeys key_first - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits<RandIt>::size_type const l_block - , typename iterator_traits<RandIt>::size_type const l_irreg1 - , typename iterator_traits<RandIt>::size_type const n_block_a - , typename iterator_traits<RandIt>::size_type const n_block_b - , typename iterator_traits<RandIt>::size_type const l_irreg2 - , Compare comp - , RandItBuf const buf_first - , bool const xbuf_used) -{ - if(xbuf_used){ - op_merge_blocks_with_buf - (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first); - } - else{ - op_merge_blocks_with_buf - (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first); - } -} +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// +// +// op_insertion_sort_step_left/right +// +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// template<class RandIt, class Compare, class Op> typename iterator_traits<RandIt>::size_type @@ -1596,6 +1581,41 @@ typename iterator_traits<RandIt>::size_type return s; } +template<class RandIt, class Compare, class Op> +void op_merge_right_step_once + ( RandIt first_block + , typename iterator_traits<RandIt>::size_type const elements_in_blocks + , typename iterator_traits<RandIt>::size_type const l_build_buf + , Compare comp + , Op op) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type restk = elements_in_blocks%(2*l_build_buf); + size_type p = elements_in_blocks - restk; + BOOST_ASSERT(0 == (p%(2*l_build_buf))); + + if(restk <= l_build_buf){ + op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf); + } + else{ + op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op); + } + while(p>0){ + p -= 2*l_build_buf; + op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); + } +} + + +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// +// +// insertion_sort_step +// +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// template<class RandIt, class Compare> typename iterator_traits<RandIt>::size_type insertion_sort_step @@ -1616,6 +1636,15 @@ typename iterator_traits<RandIt>::size_type return s; } +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// +// +// op_merge_left_step_multiple +// +////////////////////////////////// +////////////////////////////////// +////////////////////////////////// template<class RandIt, class Compare, class Op> typename iterator_traits<RandIt>::size_type op_merge_left_step_multiple @@ -1651,815 +1680,6 @@ typename iterator_traits<RandIt>::size_type return l_merged; } -template<class RandIt, class Compare, class Op> -void op_merge_right_step_once - ( RandIt first_block - , typename iterator_traits<RandIt>::size_type const elements_in_blocks - , typename iterator_traits<RandIt>::size_type const l_build_buf - , Compare comp - , Op op) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - size_type restk = elements_in_blocks%(2*l_build_buf); - size_type p = elements_in_blocks - restk; - BOOST_ASSERT(0 == (p%(2*l_build_buf))); - - if(restk <= l_build_buf){ - op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf); - } - else{ - op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op); - } - while(p>0){ - p -= 2*l_build_buf; - op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); - } -} - - -// build blocks of length 2*l_build_buf. l_build_buf is power of two -// input: [0, l_build_buf) elements are buffer, rest unsorted elements -// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted -// -// First elements are merged from right to left until elements start -// at first. All old elements [first, first + l_build_buf) are placed at the end -// [first+len-l_build_buf, first+len). To achieve this: -// - If we have external memory to merge, we save elements from the buffer -// so that a non-swapping merge is used. Buffer elements are restored -// at the end of the buffer from the external memory. -// -// - When the external memory is not available or it is insufficient -// for a merge operation, left swap merging is used. -// -// Once elements are merged left to right in blocks of l_build_buf, then a single left -// to right merge step is performed to achieve merged blocks of size 2K. -// If external memory is available, usual merge is used, swap merging otherwise. -// -// As a last step, if auxiliary memory is available in-place merge is performed. -// until all is merged or auxiliary memory is not large enough. -template<class RandIt, class Compare, class XBuf> -typename iterator_traits<RandIt>::size_type - adaptive_sort_build_blocks - ( RandIt const first - , typename iterator_traits<RandIt>::size_type const len - , typename iterator_traits<RandIt>::size_type const l_base - , typename iterator_traits<RandIt>::size_type const l_build_buf - , XBuf & xbuf - , Compare comp) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - BOOST_ASSERT(l_build_buf <= len); - BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); - - //Place the start pointer after the buffer - RandIt first_block = first + l_build_buf; - size_type const elements_in_blocks = len - l_build_buf; - - ////////////////////////////////// - // Start of merge to left step - ////////////////////////////////// - size_type l_merged = 0u; - - BOOST_ASSERT(l_build_buf); - //If there is no enough buffer for the insertion sort step, just avoid the external buffer - size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); - kbuf = kbuf < l_base ? 0 : kbuf; - - if(kbuf){ - //Backup internal buffer values in external buffer so they can be overwritten - xbuf.move_assign(first+l_build_buf-kbuf, kbuf); - l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); - - //Now combine them using the buffer. Elements from buffer can be - //overwritten since they've been saved to xbuf - l_merged = op_merge_left_step_multiple - ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); - - //Restore internal buffer from external buffer unless kbuf was l_build_buf, - //in that case restoration will happen later - if(kbuf != l_build_buf){ - boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); - } - } - else{ - l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); - rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); - } - - //Now combine elements using the buffer. Elements from buffer can't be - //overwritten since xbuf was not big enough, so merge swapping elements. - l_merged = op_merge_left_step_multiple - (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); - - BOOST_ASSERT(l_merged == l_build_buf); - - ////////////////////////////////// - // Start of merge to right step - ////////////////////////////////// - - //If kbuf is l_build_buf then we can merge right without swapping - //Saved data is still in xbuf - if(kbuf && kbuf == l_build_buf){ - op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); - //Restore internal buffer from external buffer if kbuf was l_build_buf. - //as this operation was previously delayed. - boost::move(xbuf.data(), xbuf.data() + kbuf, first); - } - else{ - op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); - } - xbuf.clear(); - //2*l_build_buf or total already merged - return min_value(elements_in_blocks, 2*l_build_buf); -} - -template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf> -void adaptive_sort_combine_blocks - ( RandItKeys const keys - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits<RandIt>::size_type const len - , typename iterator_traits<RandIt>::size_type const l_prev_merged - , typename iterator_traits<RandIt>::size_type const l_block - , bool const use_buf - , bool const xbuf_used - , XBuf & xbuf - , Compare comp - , bool merge_left) -{ - (void)xbuf; - typedef typename iterator_traits<RandIt>::size_type size_type; - - size_type const l_reg_combined = 2*l_prev_merged; - size_type l_irreg_combined = 0; - size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); - size_type const n_reg_combined = len/l_reg_combined; - RandIt combined_first = first; - - (void)l_total_combined; - BOOST_ASSERT(l_total_combined <= len); - - size_type const max_i = n_reg_combined + (l_irreg_combined != 0); - - if(merge_left || !use_buf) { - for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { - //Now merge blocks - bool const is_last = combined_i==n_reg_combined; - size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; - - range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); - size_type n_block_a, n_block_b, l_irreg1, l_irreg2; - combine_params( keys, key_comp, l_cur_combined - , l_prev_merged, l_block, rbuf - , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); - if(!use_buf){ - merge_blocks_bufferless - (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); - } - else{ - merge_blocks_left - (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); - } - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block); - } - } - else{ - combined_first += l_reg_combined*(max_i-1); - for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { - bool const is_last = combined_i==n_reg_combined; - size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; - - RandIt const combined_last(combined_first+l_cur_combined); - range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); - size_type n_block_a, n_block_b, l_irreg1, l_irreg2; - combine_params( keys, key_comp, l_cur_combined - , l_prev_merged, l_block, rbuf - , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); - BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); - merge_blocks_right - (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block); - } - } -} - -//Returns true if buffer is placed in -//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is -//[buffer,buffer+l_intbuf) -template<class RandIt, class Compare, class XBuf> -bool adaptive_sort_combine_all_blocks - ( RandIt keys - , typename iterator_traits<RandIt>::size_type &n_keys - , RandIt const buffer - , typename iterator_traits<RandIt>::size_type const l_buf_plus_data - , typename iterator_traits<RandIt>::size_type l_merged - , typename iterator_traits<RandIt>::size_type &l_intbuf - , XBuf & xbuf - , Compare comp) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - RandIt const first = buffer + l_intbuf; - size_type const l_data = l_buf_plus_data - l_intbuf; - size_type const l_unique = l_intbuf+n_keys; - //Backup data to external buffer once if possible - bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); - if(common_xbuf){ - xbuf.move_assign(buffer, l_intbuf); - } - - bool prev_merge_left = true; - size_type l_prev_total_combined = l_merged, l_prev_block = 0; - bool prev_use_internal_buf = true; - - for( size_type n = 0; l_data > l_merged - ; l_merged*=2 - , ++n){ - //If l_intbuf is non-zero, use that internal buffer. - // Implies l_block == l_intbuf && use_internal_buf == true - //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, - // Implies l_block == n_keys/2 && use_internal_buf == true - //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) - bool use_internal_buf = false; - size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf); - BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); - BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); - BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); - - bool const is_merge_left = (n&1) == 0; - size_type const l_total_combined = calculate_total_combined(l_data, l_merged); - if(n && prev_use_internal_buf && prev_merge_left){ - if(is_merge_left || !use_internal_buf){ - move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); - } - else{ - //Put the buffer just after l_total_combined - RandIt const buf_end = first+l_prev_total_combined; - RandIt const buf_beg = buf_end-l_block; - if(l_prev_total_combined > l_total_combined){ - size_type const l_diff = l_prev_total_combined - l_total_combined; - move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); - } - else if(l_prev_total_combined < l_total_combined){ - size_type const l_diff = l_total_combined - l_prev_total_combined; - move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); - } - } - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf); - } - - //Combine to form l_merged*2 segments - if(n_keys){ - adaptive_sort_combine_blocks - ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block - , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); - } - else{ - size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); - adaptive_sort_combine_blocks - ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block - , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); - } - - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf); - prev_merge_left = is_merge_left; - l_prev_total_combined = l_total_combined; - l_prev_block = l_block; - prev_use_internal_buf = use_internal_buf; - } - BOOST_ASSERT(l_prev_total_combined == l_data); - bool const buffer_right = prev_use_internal_buf && prev_merge_left; - - l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; - n_keys = l_unique - l_intbuf; - //Restore data from to external common buffer if used - if(common_xbuf){ - if(buffer_right){ - boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); - } - else{ - boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); - } - } - return buffer_right; -} - -template<class RandIt, class Compare, class XBuf> -void stable_merge - ( RandIt first, RandIt const middle, RandIt last - , Compare comp - , XBuf &xbuf) -{ - BOOST_ASSERT(xbuf.empty()); - typedef typename iterator_traits<RandIt>::size_type size_type; - size_type const len1 = size_type(middle-first); - size_type const len2 = size_type(last-middle); - size_type const l_min = min_value(len1, len2); - if(xbuf.capacity() >= l_min){ - buffered_merge(first, middle, last, comp, xbuf); - xbuf.clear(); - } - else{ - merge_bufferless(first, middle, last, comp); - } -} - - -template<class RandIt, class Compare, class XBuf> -void adaptive_sort_final_merge( bool buffer_right - , RandIt const first - , typename iterator_traits<RandIt>::size_type const l_intbuf - , typename iterator_traits<RandIt>::size_type const n_keys - , typename iterator_traits<RandIt>::size_type const len - , XBuf & xbuf - , Compare comp) -{ - //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); - xbuf.clear(); - - typedef typename iterator_traits<RandIt>::size_type size_type; - size_type const n_key_plus_buf = l_intbuf+n_keys; - if(buffer_right){ - stable_sort(first+len-l_intbuf, first+len, comp, xbuf); - stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); - stable_sort(first, first+n_keys, comp, xbuf); - stable_merge(first, first+n_keys, first+len, comp, xbuf); - } - else{ - stable_sort(first, first+n_key_plus_buf, comp, xbuf); - if(xbuf.capacity() >= n_key_plus_buf){ - buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); - } - else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ - stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf); - stable_merge(first, first+n_keys, first+len, comp, xbuf); - } - else{ - merge_bufferless(first, first+n_key_plus_buf, first+len, comp); - } - } - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len); -} - -template<class RandIt, class Compare, class Unsigned, class XBuf> -bool adaptive_sort_build_params - (RandIt first, Unsigned const len, Compare comp - , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf - , XBuf & xbuf - ) -{ - typedef Unsigned size_type; - - //Calculate ideal parameters and try to collect needed unique keys - l_base = 0u; - - //Try to find a value near sqrt(len) that is 2^N*l_base where - //l_base <= AdaptiveSortInsertionSortThreshold. This property is important - //as build_blocks merges to the left iteratively duplicating the - //merged size and all the buffer must be used just before the final - //merge to right step. This guarantees "build_blocks" produces - //segments of size l_build_buf*2, maximizing the classic merge phase. - l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); - - //The internal buffer can be expanded if there is enough external memory - while(xbuf.capacity() >= l_intbuf*2){ - l_intbuf *= 2; - } - - //This is the minimum number of keys to implement the ideal algorithm - // - //l_intbuf is used as buffer plus the key count - size_type n_min_ideal_keys = l_intbuf-1; - while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ - --n_min_ideal_keys; - } - n_min_ideal_keys += 1; - BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); - - if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){ - n_keys = 0u; - l_build_buf = l_intbuf; - } - else{ - //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that - //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half - //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. - // - //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, - //(to be used for keys in combine_all_blocks) as the whole l_build_buf - //will be backuped in the buffer during build_blocks. - bool const non_unique_buf = xbuf.capacity() >= l_intbuf; - size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2; - size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); - - //If available memory is 2*sqrt(l), then for "build_params" - //the situation is the same as if 2*l_intbuf were collected. - if(non_unique_buf && collected == n_min_ideal_keys){ - l_build_buf = l_intbuf; - n_keys = n_min_ideal_keys; - } - else if(collected == 2*l_intbuf){ - //l_intbuf*2 elements found. Use all of them in the build phase - l_build_buf = l_intbuf*2; - n_keys = l_intbuf; - } - else if(collected == (n_min_ideal_keys+l_intbuf)){ - l_build_buf = l_intbuf; - n_keys = n_min_ideal_keys; - } - //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix - //is possible (due to very low unique keys), then go to a slow sort based on rotations. - else{ - BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); - if(collected < 4){ //No combination possible with less that 4 keys - return false; - } - n_keys = l_intbuf; - while(n_keys&(n_keys-1)){ - n_keys &= n_keys-1; // make it power or 2 - } - while(n_keys > collected){ - n_keys/=2; - } - //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two - l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); - l_intbuf = 0; - l_build_buf = n_keys; - } - BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); - } - - return true; -} - -template<class RandIt, class Compare, class XBuf> -inline void adaptive_merge_combine_blocks( RandIt first - , typename iterator_traits<RandIt>::size_type len1 - , typename iterator_traits<RandIt>::size_type len2 - , typename iterator_traits<RandIt>::size_type collected - , typename iterator_traits<RandIt>::size_type n_keys - , typename iterator_traits<RandIt>::size_type l_block - , bool use_internal_buf - , bool xbuf_used - , Compare comp - , XBuf & xbuf - ) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - size_type const len = len1+len2; - size_type const l_combine = len-collected; - size_type const l_combine1 = len1-collected; - - if(n_keys){ - RandIt const first_data = first+collected; - RandIt const keys = first; - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); - if(xbuf_used){ - if(xbuf.size() < l_block){ - xbuf.initialize_until(l_block, *first); - } - BOOST_ASSERT(xbuf.size() >= l_block); - size_type n_block_a, n_block_b, l_irreg1, l_irreg2; - combine_params( keys, comp, l_combine - , l_combine1, l_block, xbuf - , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs - merge_blocks_with_buf - (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); - } - else{ - size_type n_block_a, n_block_b, l_irreg1, l_irreg2; - combine_params( keys, comp, l_combine - , l_combine1, l_block, xbuf - , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs - if(use_internal_buf){ - merge_blocks_with_buf - (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); - } - else{ - merge_blocks_bufferless - (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); - } - } - } - else{ - xbuf.shrink_to_fit(l_block); - if(xbuf.size() < l_block){ - xbuf.initialize_until(l_block, *first); - } - size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); - size_type n_block_a, n_block_b, l_irreg1, l_irreg2; - combine_params( uint_keys, less(), l_combine - , l_combine1, l_block, xbuf - , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); - BOOST_ASSERT(xbuf.size() >= l_block); - merge_blocks_with_buf - (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true); - xbuf.clear(); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); - } -} - -template<class RandIt, class Compare, class XBuf> -inline void adaptive_merge_final_merge( RandIt first - , typename iterator_traits<RandIt>::size_type len1 - , typename iterator_traits<RandIt>::size_type len2 - , typename iterator_traits<RandIt>::size_type collected - , typename iterator_traits<RandIt>::size_type l_intbuf - , typename iterator_traits<RandIt>::size_type l_block - , bool use_internal_buf - , bool xbuf_used - , Compare comp - , XBuf & xbuf - ) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - (void)l_block; - size_type n_keys = collected-l_intbuf; - size_type len = len1+len2; - if(use_internal_buf){ - if(xbuf_used){ - xbuf.clear(); - //Nothing to do - if(n_keys){ - stable_sort(first, first+n_keys, comp, xbuf); - stable_merge(first, first+n_keys, first+len, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len); - } - } - else{ - xbuf.clear(); - stable_sort(first, first+collected, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); - stable_merge(first, first+collected, first+len, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); - } - } - else{ - xbuf.clear(); - stable_sort(first, first+collected, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); - stable_merge(first, first+collected, first+len1+len2, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); - } - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); -} - -template<class SizeType, class Xbuf> -inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) -{ - typedef SizeType size_type; - size_type l_block = rl_block; - size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; - - while(xbuf.capacity() >= l_block*2){ - l_block *= 2; - } - - //This is the minimum number of keys to implement the ideal algorithm - size_type n_keys = len1/l_block+len2/l_block; - while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ - --n_keys; - } - ++n_keys; - BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); - - if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ - n_keys = 0u; - } - l_intbuf_inout = l_intbuf; - rl_block = l_block; - return n_keys; -} - -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// - -// Main explanation of the sort algorithm. -// -// csqrtlen = ceil(sqrt(len)); -// -// * First, 2*csqrtlen unique elements elements are extracted from elements to be -// sorted and placed in the beginning of the range. -// -// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements -// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are -// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step -// 2*csqrtlen unique elements are again the leading elements of the whole range. -// -// * Step "combine_blocks": pairs of previously formed blocks are merged with a different -// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the -// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen -// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. -// -// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to -// know if elements belong to the first or second block to be merged and another -// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: -// -// Iteratively until all trailing (len-2*csqrtlen) elements are merged: -// Iteratively for each pair of previously merged block: -// * Blocks are divided groups of csqrtlen elements and -// 2*merged_block/csqrtlen keys are sorted to be used as markers -// * Groups are selection-sorted by first or last element (depending whether they are going -// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. -// * Elements of each block pair are merged using the csqrtlen buffer taking into account -// if they belong to the first half or second half (marked by the key). -// -// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with -// rotations with the rest of sorted elements in the "combine_blocks" step. -// -// Corner cases: -// -// * If no 2*csqrtlen elements can be extracted: -// -// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used -// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This -// means that an additional "combine_blocks" step will be needed to merge all elements. -// -// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, -// then reduces the number of elements used as buffer and keys in the "build_blocks" -// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction -// then uses a rotation based smart merge. -// -// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. -// -// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. -// -// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), -// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral -// keys to combine blocks. -// -// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks -// using classic merge and "combine_blocks" will use bigger blocks when merging. -template<class RandIt, class Compare, class XBuf> -void adaptive_sort_impl - ( RandIt first - , typename iterator_traits<RandIt>::size_type const len - , Compare comp - , XBuf & xbuf - ) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - - //Small sorts go directly to insertion sort - if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ - insertion_sort(first, first + len, comp); - } - else if((len-len/2) <= xbuf.capacity()){ - merge_sort(first, first+len, comp, xbuf.data()); - } - else{ - //Make sure it is at least four - BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); - - size_type l_base = 0; - size_type l_intbuf = 0; - size_type n_keys = 0; - size_type l_build_buf = 0; - - //Calculate and extract needed unique elements. If a minimum is not achieved - //fallback to a slow stable sort - if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ - stable_sort(first, first+len, comp, xbuf); - } - else{ - BOOST_ASSERT(l_build_buf); - //Otherwise, continue the adaptive_sort - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len); - size_type const n_key_plus_buf = l_intbuf+n_keys; - //l_build_buf is always power of two if l_intbuf is zero - BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); - - //Classic merge sort until internal buffer and xbuf are exhausted - size_type const l_merged = adaptive_sort_build_blocks - (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len); - - //Non-trivial merge - bool const buffer_right = adaptive_sort_combine_all_blocks - (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); - - //Sort keys and buffer and merge the whole sequence - adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); - } - } -} - -// Main explanation of the merge algorithm. -// -// csqrtlen = ceil(sqrt(len)); -// -// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect -// unique elements are extracted from elements to be sorted and placed in the beginning of the range. -// -// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements -// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. -// -// Explanation of the "combine_blocks" step: -// -// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. -// Remaining elements that can't form a group are grouped in front of those elements. -// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. -// Remaining elements that can't form a group are grouped in the back of those elements. -// * In parallel the following two steps are performed: -// * Groups are selection-sorted by first or last element (depending whether they are going -// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. -// * Elements of each block pair are merged using the csqrtlen buffer taking into account -// if they belong to the first half or second half (marked by the key). -// -// * In the final merge step leading "to_collect" elements are merged with rotations -// with the rest of merged elements in the "combine_blocks" step. -// -// Corner cases: -// -// * If no "to_collect" elements can be extracted: -// -// * If more than a minimum number of elements is extracted -// then reduces the number of elements used as buffer and keys in the -// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction -// then uses a rotation based smart merge. -// -// * If the minimum number of keys can't be extracted, a rotation-based merge is performed. -// -// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. -// -// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. -// -// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), -// then no csqrtlen need to be extracted and "combine_blocks" will use integral -// keys to combine blocks. -template<class RandIt, class Compare, class XBuf> -void adaptive_merge_impl - ( RandIt first - , typename iterator_traits<RandIt>::size_type const len1 - , typename iterator_traits<RandIt>::size_type const len2 - , Compare comp - , XBuf & xbuf - ) -{ - typedef typename iterator_traits<RandIt>::size_type size_type; - - if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ - buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); - } - else{ - const size_type len = len1+len2; - //Calculate ideal parameters and try to collect needed unique keys - size_type l_block = size_type(ceil_sqrt(len)); - - //One range is not big enough to extract keys and the internal buffer so a - //rotation-based based merge will do just fine - if(len1 <= l_block*2 || len2 <= l_block*2){ - merge_bufferless(first, first+len1, first+len1+len2, comp); - return; - } - - //Detail the number of keys and internal buffer. If xbuf has enough memory, no - //internal buffer is needed so l_intbuf will remain 0. - size_type l_intbuf = 0; - size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); - size_type const to_collect = l_intbuf+n_keys; - //Try to extract needed unique values from the first range - size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); - - //Not the minimum number of keys is not available on the first range, so fallback to rotations - if(collected != to_collect && collected < 4){ - merge_bufferless(first, first+collected, first+len1, comp); - merge_bufferless(first, first + len1, first + len1 + len2, comp); - return; - } - - //If not enough keys but more than minimum, adjust the internal buffer and key count - bool use_internal_buf = collected == to_collect; - if (!use_internal_buf){ - l_intbuf = 0u; - n_keys = collected; - l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); - //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used - l_intbuf = use_internal_buf ? l_block : 0u; - } - - bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; - //Merge trailing elements using smart merges - adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); - //Merge buffer and keys with the rest of the values - adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); - } -} - } //namespace detail_adaptive { } //namespace movelib { diff --git a/boost/move/algo/detail/heap_sort.hpp b/boost/move/algo/detail/heap_sort.hpp new file mode 100644 index 0000000000..5474d9f5c4 --- /dev/null +++ b/boost/move/algo/detail/heap_sort.hpp @@ -0,0 +1,111 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2017-2018. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +//! \file + +#ifndef BOOST_MOVE_DETAIL_HEAP_SORT_HPP +#define BOOST_MOVE_DETAIL_HEAP_SORT_HPP + +#ifndef BOOST_CONFIG_HPP +# include <boost/config.hpp> +#endif +# +#if defined(BOOST_HAS_PRAGMA_ONCE) +# pragma once +#endif + +#include <boost/move/detail/config_begin.hpp> +#include <boost/move/detail/workaround.hpp> +#include <boost/move/detail/iterator_traits.hpp> +#include <boost/move/algo/detail/is_sorted.hpp> +#include <boost/move/utility_core.hpp> + +namespace boost { namespace movelib{ + +template <class RandomAccessIterator, class Compare> +class heap_sort_helper +{ + typedef typename boost::movelib::iterator_traits<RandomAccessIterator>::size_type size_type; + typedef typename boost::movelib::iterator_traits<RandomAccessIterator>::value_type value_type; + + static void adjust_heap(RandomAccessIterator first, size_type hole_index, size_type const len, value_type &value, Compare comp) + { + size_type const top_index = hole_index; + size_type second_child = 2 * (hole_index + 1); + + while (second_child < len) { + if (comp(*(first + second_child), *(first + (second_child - 1)))) + second_child--; + *(first + hole_index) = boost::move(*(first + second_child)); + hole_index = second_child; + second_child = 2 * (second_child + 1); + } + if (second_child == len) { + *(first + hole_index) = boost::move(*(first + (second_child - 1))); + hole_index = second_child - 1; + } + + { //push_heap-like ending + size_type parent = (hole_index - 1) / 2; + while (hole_index > top_index && comp(*(first + parent), value)) { + *(first + hole_index) = boost::move(*(first + parent)); + hole_index = parent; + parent = (hole_index - 1) / 2; + } + *(first + hole_index) = boost::move(value); + } + } + + static void make_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp) + { + size_type const len = size_type(last - first); + if (len > 1) { + size_type parent = len/2u - 1u; + + do { + value_type v(boost::move(*(first + parent))); + adjust_heap(first, parent, len, v, comp); + }while (parent--); + } + } + + static void sort_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp) + { + size_type len = size_type(last - first); + while (len > 1) { + //move biggest to the safe zone + --last; + value_type v(boost::move(*last)); + *last = boost::move(*first); + adjust_heap(first, size_type(0), --len, v, comp); + } + } + + public: + static void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp) + { + make_heap(first, last, comp); + sort_heap(first, last, comp); + BOOST_ASSERT(boost::movelib::is_sorted(first, last, comp)); + } +}; + +template <class RandomAccessIterator, class Compare> +BOOST_MOVE_FORCEINLINE void heap_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp) +{ + heap_sort_helper<RandomAccessIterator, Compare>::sort(first, last, comp); +} + +}} //namespace boost { namespace movelib{ + +#include <boost/move/detail/config_end.hpp> + +#endif //#ifndef BOOST_MOVE_DETAIL_HEAP_SORT_HPP diff --git a/boost/move/algo/detail/insertion_sort.hpp b/boost/move/algo/detail/insertion_sort.hpp index 3328f75748..5c378c3e36 100644 --- a/boost/move/algo/detail/insertion_sort.hpp +++ b/boost/move/algo/detail/insertion_sort.hpp @@ -101,21 +101,21 @@ void insertion_sort_uninitialized_copy typedef typename iterator_traits<BirdirectionalIterator>::value_type value_type; if (first1 != last1){ BirdirectionalRawIterator last2 = first2; - ::new((iterator_to_raw_pointer)(last2), boost_move_new_t()) value_type(move(*first1)); + ::new((iterator_to_raw_pointer)(last2), boost_move_new_t()) value_type(::boost::move(*first1)); destruct_n<value_type, BirdirectionalRawIterator> d(first2); d.incr(); for (++last2; ++first1 != last1; ++last2){ BirdirectionalRawIterator j2 = last2; BirdirectionalRawIterator k2 = j2; if (comp(*first1, *--k2)){ - ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(move(*k2)); + ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(::boost::move(*k2)); d.incr(); for (--j2; k2 != first2 && comp(*first1, *--k2); --j2) - *j2 = move(*k2); - *j2 = move(*first1); + *j2 = ::boost::move(*k2); + *j2 = ::boost::move(*first1); } else{ - ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(move(*first1)); + ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(::boost::move(*first1)); d.incr(); } } diff --git a/boost/move/algo/detail/is_sorted.hpp b/boost/move/algo/detail/is_sorted.hpp new file mode 100644 index 0000000000..d3dccfc2db --- /dev/null +++ b/boost/move/algo/detail/is_sorted.hpp @@ -0,0 +1,55 @@ +#ifndef BOOST_MOVE_DETAIL_IS_SORTED_HPP +#define BOOST_MOVE_DETAIL_IS_SORTED_HPP +/////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2017-2018. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/container for documentation. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef BOOST_CONFIG_HPP +# include <boost/config.hpp> +#endif + +#if defined(BOOST_HAS_PRAGMA_ONCE) +# pragma once +#endif + +namespace boost { +namespace movelib { + +template<class ForwardIt, class Pred> +bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred) +{ + if (first != last) { + ForwardIt next = first, cur(first); + while (++next != last) { + if (pred(*next, *cur)) + return false; + cur = next; + } + } + return true; +} + +template<class ForwardIt, class Pred> +bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred) +{ + if (first != last) { + ForwardIt next = first; + while (++next != last) { + if (!pred(*first, *next)) + return false; + first = next; + } + } + return true; +} + +} //namespace movelib { +} //namespace boost { + +#endif //BOOST_MOVE_DETAIL_IS_SORTED_HPP diff --git a/boost/move/algo/detail/merge.hpp b/boost/move/algo/detail/merge.hpp index 621dfa28af..860773579c 100644 --- a/boost/move/algo/detail/merge.hpp +++ b/boost/move/algo/detail/merge.hpp @@ -256,56 +256,67 @@ void swap_merge_right op_merge_right(first1, last1, last2, buf_last, comp, swap_op()); } -template <class BidirIt, class Distance, class Compare> +//Complexity: min(len1,len2)^2 + max(len1,len2) +template<class RandIt, class Compare> +void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp) +{ + if((middle - first) < (last - middle)){ + while(first != middle){ + RandIt const old_last1 = middle; + middle = boost::movelib::lower_bound(middle, last, *first, comp); + first = rotate_gcd(first, old_last1, middle); + if(middle == last){ + break; + } + do{ + ++first; + } while(first != middle && !comp(*middle, *first)); + } + } + else{ + while(middle != last){ + RandIt p = boost::movelib::upper_bound(first, middle, last[-1], comp); + last = rotate_gcd(p, middle, last); + middle = p; + if(middle == first){ + break; + } + --p; + do{ + --last; + } while(middle != last && !comp(last[-1], *p)); + } + } +} + +static const std::size_t MergeBufferlessONLogNRotationThreshold = 32; + +template <class RandIt, class Distance, class Compare> void merge_bufferless_ONlogN_recursive - (BidirIt first, BidirIt middle, BidirIt last, Distance len1, Distance len2, Compare comp) + (RandIt first, RandIt middle, RandIt last, Distance len1, Distance len2, Compare comp) { - typedef typename iterator_traits<BidirIt>::size_type size_type; + typedef typename iterator_traits<RandIt>::size_type size_type; + while(1) { - //#define MERGE_BUFFERLESS_RECURSIVE_OPT - #ifndef MERGE_BUFFERLESS_RECURSIVE_OPT - if (len2 == 0) { + //trivial cases + if (!len2) { return; } - - if (!len1) { + else if (!len1) { return; } - - if ((len1 | len2) == 1) { + else if (size_type(len1 | len2) == 1u) { if (comp(*middle, *first)) adl_move_swap(*first, *middle); return; } - #else - if (len2 == 0) { + else if(size_type(len1+len2) < MergeBufferlessONLogNRotationThreshold){ + merge_bufferless_ON2(first, middle, last, comp); return; } - if (!len1) { - return; - } - BidirIt middle_prev = middle; --middle_prev; - if(!comp(*middle, *middle_prev)) - return; - - while(true) { - if (comp(*middle, *first)) - break; - ++first; - if(--len1 == 1) - break; - } - - if (len1 == 1 && len2 == 1) { - //comp(*middle, *first) == true already tested in the loop - adl_move_swap(*first, *middle); - return; - } - #endif - - BidirIt first_cut = first; - BidirIt second_cut = middle; + RandIt first_cut = first; + RandIt second_cut = middle; Distance len11 = 0; Distance len22 = 0; if (len1 > len2) { @@ -320,20 +331,18 @@ void merge_bufferless_ONlogN_recursive first_cut = boost::movelib::upper_bound(first, middle, *second_cut, comp); len11 = size_type(first_cut - first); } - BidirIt new_middle = rotate_gcd(first_cut, middle, second_cut); + RandIt new_middle = rotate_gcd(first_cut, middle, second_cut); //Avoid one recursive call doing a manual tail call elimination on the biggest range const Distance len_internal = len11+len22; if( len_internal < (len1 + len2 - len_internal) ) { merge_bufferless_ONlogN_recursive(first, first_cut, new_middle, len11, len22, comp); - //merge_bufferless_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp); first = new_middle; middle = second_cut; len1 -= len11; len2 -= len22; } else { - //merge_bufferless_recursive(first, first_cut, new_middle, len11, len22, comp); merge_bufferless_ONlogN_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp); middle = first_cut; last = new_middle; @@ -344,50 +353,17 @@ void merge_bufferless_ONlogN_recursive } //Complexity: NlogN -template<class BidirIt, class Compare> -void merge_bufferless_ONlogN(BidirIt first, BidirIt middle, BidirIt last, Compare comp) +template<class RandIt, class Compare> +void merge_bufferless_ONlogN(RandIt first, RandIt middle, RandIt last, Compare comp) { merge_bufferless_ONlogN_recursive (first, middle, last, middle - first, last - middle, comp); } -//Complexity: min(len1,len2)^2 + max(len1,len2) -template<class RandIt, class Compare> -void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp) -{ - if((middle - first) < (last - middle)){ - while(first != middle){ - RandIt const old_last1 = middle; - middle = boost::movelib::lower_bound(middle, last, *first, comp); - first = rotate_gcd(first, old_last1, middle); - if(middle == last){ - break; - } - do{ - ++first; - } while(first != middle && !comp(*middle, *first)); - } - } - else{ - while(middle != last){ - RandIt p = boost::movelib::upper_bound(first, middle, last[-1], comp); - last = rotate_gcd(p, middle, last); - middle = p; - if(middle == first){ - break; - } - --p; - do{ - --last; - } while(middle != last && !comp(last[-1], *p)); - } - } -} - template<class RandIt, class Compare> void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp) { - //#define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE + #define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE #ifdef BOOST_ADAPTIVE_MERGE_NLOGN_MERGE merge_bufferless_ONlogN(first, middle, last, comp); #else diff --git a/boost/move/algo/detail/pdqsort.hpp b/boost/move/algo/detail/pdqsort.hpp new file mode 100644 index 0000000000..b6a127896c --- /dev/null +++ b/boost/move/algo/detail/pdqsort.hpp @@ -0,0 +1,334 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Orson Peters 2017. +// (C) Copyright Ion Gaztanaga 2017-2018. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// +// +// This implementation of Pattern-defeating quicksort (pdqsort) was written +// by Orson Peters, and discussed in the Boost mailing list: +// http://boost.2283326.n4.nabble.com/sort-pdqsort-td4691031.html +// +// This implementation is the adaptation by Ion Gaztanaga of code originally in GitHub +// with permission from the author to relicense it under the Boost Software License +// (see the Boost mailing list for details). +// +// The original copyright statement is pasted here for completeness: +// +// pdqsort.h - Pattern-defeating quicksort. +// Copyright (c) 2015 Orson Peters +// This software is provided 'as-is', without any express or implied warranty. In no event will the +// authors be held liable for any damages arising from the use of this software. +// Permission is granted to anyone to use this software for any purpose, including commercial +// applications, and to alter it and redistribute it freely, subject to the following restrictions: +// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the +// original software. If you use this software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as +// being the original software. +// 3. This notice may not be removed or altered from any source distribution. +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef BOOST_MOVE_ALGO_PDQSORT_HPP +#define BOOST_MOVE_ALGO_PDQSORT_HPP + +#ifndef BOOST_CONFIG_HPP +# include <boost/config.hpp> +#endif +# +#if defined(BOOST_HAS_PRAGMA_ONCE) +# pragma once +#endif + +#include <boost/move/detail/config_begin.hpp> +#include <boost/move/detail/workaround.hpp> +#include <boost/move/utility_core.hpp> +#include <boost/move/algo/detail/insertion_sort.hpp> +#include <boost/move/algo/detail/heap_sort.hpp> +#include <boost/move/detail/iterator_traits.hpp> + +#include <boost/move/adl_move_swap.hpp> +#include <cstddef> + +namespace boost { +namespace movelib { + +namespace pdqsort_detail { + + //A simple pair implementation to avoid including <utility> + template<class T1, class T2> + struct pair + { + pair() + {} + + pair(const T1 &t1, const T2 &t2) + : first(t1), second(t2) + {} + + T1 first; + T2 second; + }; + + enum { + // Partitions below this size are sorted using insertion sort. + insertion_sort_threshold = 24, + + // Partitions above this size use Tukey's ninther to select the pivot. + ninther_threshold = 128, + + // When we detect an already sorted partition, attempt an insertion sort that allows this + // amount of element moves before giving up. + partial_insertion_sort_limit = 8, + + // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char. + block_size = 64, + + // Cacheline size, assumes power of two. + cacheline_size = 64 + + }; + + // Returns floor(log2(n)), assumes n > 0. + template<class Unsigned> + Unsigned log2(Unsigned n) { + Unsigned log = 0; + while (n >>= 1) ++log; + return log; + } + + // Attempts to use insertion sort on [begin, end). Will return false if more than + // partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will + // successfully sort and return true. + template<class Iter, class Compare> + inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) { + typedef typename boost::movelib::iterator_traits<Iter>::value_type T; + typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type; + if (begin == end) return true; + + size_type limit = 0; + for (Iter cur = begin + 1; cur != end; ++cur) { + if (limit > partial_insertion_sort_limit) return false; + + Iter sift = cur; + Iter sift_1 = cur - 1; + + // Compare first so we can avoid 2 moves for an element already positioned correctly. + if (comp(*sift, *sift_1)) { + T tmp = boost::move(*sift); + + do { *sift-- = boost::move(*sift_1); } + while (sift != begin && comp(tmp, *--sift_1)); + + *sift = boost::move(tmp); + limit += size_type(cur - sift); + } + } + + return true; + } + + template<class Iter, class Compare> + inline void sort2(Iter a, Iter b, Compare comp) { + if (comp(*b, *a)) boost::adl_move_iter_swap(a, b); + } + + // Sorts the elements *a, *b and *c using comparison function comp. + template<class Iter, class Compare> + inline void sort3(Iter a, Iter b, Iter c, Compare comp) { + sort2(a, b, comp); + sort2(b, c, comp); + sort2(a, b, comp); + } + + // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal + // to the pivot are put in the right-hand partition. Returns the position of the pivot after + // partitioning and whether the passed sequence already was correctly partitioned. Assumes the + // pivot is a median of at least 3 elements and that [begin, end) is at least + // insertion_sort_threshold long. + template<class Iter, class Compare> + pdqsort_detail::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) { + typedef typename boost::movelib::iterator_traits<Iter>::value_type T; + + // Move pivot into local for speed. + T pivot(boost::move(*begin)); + + Iter first = begin; + Iter last = end; + + // Find the first element greater than or equal than the pivot (the median of 3 guarantees + // this exists). + while (comp(*++first, pivot)); + + // Find the first element strictly smaller than the pivot. We have to guard this search if + // there was no element before *first. + if (first - 1 == begin) while (first < last && !comp(*--last, pivot)); + else while ( !comp(*--last, pivot)); + + // If the first pair of elements that should be swapped to partition are the same element, + // the passed in sequence already was correctly partitioned. + bool already_partitioned = first >= last; + + // Keep swapping pairs of elements that are on the wrong side of the pivot. Previously + // swapped pairs guard the searches, which is why the first iteration is special-cased + // above. + while (first < last) { + boost::adl_move_iter_swap(first, last); + while (comp(*++first, pivot)); + while (!comp(*--last, pivot)); + } + + // Put the pivot in the right place. + Iter pivot_pos = first - 1; + *begin = boost::move(*pivot_pos); + *pivot_pos = boost::move(pivot); + + return pdqsort_detail::pair<Iter, bool>(pivot_pos, already_partitioned); + } + + // Similar function to the one above, except elements equal to the pivot are put to the left of + // the pivot and it doesn't check or return if the passed sequence already was partitioned. + // Since this is rarely used (the many equal case), and in that case pdqsort already has O(n) + // performance, no block quicksort is applied here for simplicity. + template<class Iter, class Compare> + inline Iter partition_left(Iter begin, Iter end, Compare comp) { + typedef typename boost::movelib::iterator_traits<Iter>::value_type T; + + T pivot(boost::move(*begin)); + Iter first = begin; + Iter last = end; + + while (comp(pivot, *--last)); + + if (last + 1 == end) while (first < last && !comp(pivot, *++first)); + else while ( !comp(pivot, *++first)); + + while (first < last) { + boost::adl_move_iter_swap(first, last); + while (comp(pivot, *--last)); + while (!comp(pivot, *++first)); + } + + Iter pivot_pos = last; + *begin = boost::move(*pivot_pos); + *pivot_pos = boost::move(pivot); + + return pivot_pos; + } + + + template<class Iter, class Compare> + void pdqsort_loop( Iter begin, Iter end, Compare comp + , typename boost::movelib::iterator_traits<Iter>::size_type bad_allowed + , bool leftmost = true) + { + typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type; + + // Use a while loop for tail recursion elimination. + while (true) { + size_type size = size_type(end - begin); + + // Insertion sort is faster for small arrays. + if (size < insertion_sort_threshold) { + insertion_sort(begin, end, comp); + return; + } + + // Choose pivot as median of 3 or pseudomedian of 9. + size_type s2 = size / 2; + if (size > ninther_threshold) { + sort3(begin, begin + s2, end - 1, comp); + sort3(begin + 1, begin + (s2 - 1), end - 2, comp); + sort3(begin + 2, begin + (s2 + 1), end - 3, comp); + sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); + boost::adl_move_iter_swap(begin, begin + s2); + } else sort3(begin + s2, begin, end - 1, comp); + + // If *(begin - 1) is the end of the right partition of a previous partition operation + // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our + // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in + // the left partition, greater elements in the right partition. We do not have to + // recurse on the left partition, since it's sorted (all equal). + if (!leftmost && !comp(*(begin - 1), *begin)) { + begin = partition_left(begin, end, comp) + 1; + continue; + } + + // Partition and get results. + pdqsort_detail::pair<Iter, bool> part_result = partition_right(begin, end, comp); + Iter pivot_pos = part_result.first; + bool already_partitioned = part_result.second; + + // Check for a highly unbalanced partition. + size_type l_size = size_type(pivot_pos - begin); + size_type r_size = size_type(end - (pivot_pos + 1)); + bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; + + // If we got a highly unbalanced partition we shuffle elements to break many patterns. + if (highly_unbalanced) { + // If we had too many bad partitions, switch to heapsort to guarantee O(n log n). + if (--bad_allowed == 0) { + boost::movelib::heap_sort(begin, end, comp); + return; + } + + if (l_size >= insertion_sort_threshold) { + boost::adl_move_iter_swap(begin, begin + l_size / 4); + boost::adl_move_iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); + + if (l_size > ninther_threshold) { + boost::adl_move_iter_swap(begin + 1, begin + (l_size / 4 + 1)); + boost::adl_move_iter_swap(begin + 2, begin + (l_size / 4 + 2)); + boost::adl_move_iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); + boost::adl_move_iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); + } + } + + if (r_size >= insertion_sort_threshold) { + boost::adl_move_iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); + boost::adl_move_iter_swap(end - 1, end - r_size / 4); + + if (r_size > ninther_threshold) { + boost::adl_move_iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); + boost::adl_move_iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); + boost::adl_move_iter_swap(end - 2, end - (1 + r_size / 4)); + boost::adl_move_iter_swap(end - 3, end - (2 + r_size / 4)); + } + } + } else { + // If we were decently balanced and we tried to sort an already partitioned + // sequence try to use insertion sort. + if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp) + && partial_insertion_sort(pivot_pos + 1, end, comp)) return; + } + + // Sort the left partition first using recursion and do tail recursion elimination for + // the right-hand partition. + pdqsort_loop<Iter, Compare>(begin, pivot_pos, comp, bad_allowed, leftmost); + begin = pivot_pos + 1; + leftmost = false; + } + } +} + + +template<class Iter, class Compare> +void pdqsort(Iter begin, Iter end, Compare comp) +{ + if (begin == end) return; + typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type; + pdqsort_detail::pdqsort_loop<Iter, Compare>(begin, end, comp, pdqsort_detail::log2(size_type(end - begin))); +} + +} //namespace movelib { +} //namespace boost { + +#include <boost/move/detail/config_end.hpp> + +#endif //BOOST_MOVE_ALGO_PDQSORT_HPP diff --git a/boost/move/algo/detail/set_difference.hpp b/boost/move/algo/detail/set_difference.hpp new file mode 100644 index 0000000000..51d047592a --- /dev/null +++ b/boost/move/algo/detail/set_difference.hpp @@ -0,0 +1,207 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2017-2017. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// +#ifndef BOOST_MOVE_SET_DIFFERENCE_HPP +#define BOOST_MOVE_SET_DIFFERENCE_HPP + +#include <boost/move/algo/move.hpp> +#include <boost/move/iterator.hpp> +#include <boost/move/utility_core.hpp> + +namespace boost { + +namespace move_detail{ + +template<class InputIt, class OutputIt> +OutputIt copy(InputIt first, InputIt last, OutputIt result) +{ + while (first != last) { + *result++ = *first; + ++result; + ++first; + } + return result; +} + +} //namespace move_detail{ + +namespace movelib { + +//Moves the elements from the sorted range [first1, last1) which are not found in the sorted +//range [first2, last2) to the range beginning at result. +//The resulting range is also sorted. Equivalent elements are treated individually, +//that is, if some element is found m times in [first1, last1) and n times in [first2, last2), +//it will be moved to result exactly max(m-n, 0) times. +//The resulting range cannot overlap with either of the input ranges. +template<class InputIt1, class InputIt2, + class OutputIt, class Compare> +OutputIt set_difference + (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, OutputIt result, Compare comp) +{ + while (first1 != last1) { + if (first2 == last2) + return boost::move_detail::copy(first1, last1, result); + + if (comp(*first1, *first2)) { + *result = *first1; + ++result; + ++first1; + } + else { + if (!comp(*first2, *first1)) { + ++first1; + } + ++first2; + } + } + return result; +} + +//Moves the elements from the sorted range [first1, last1) which are not found in the sorted +//range [first2, last2) to the range beginning at first1 (in place operation in range1). +//The resulting range is also sorted. Equivalent elements are treated individually, +//that is, if some element is found m times in [first1, last1) and n times in [first2, last2), +//it will be moved to result exactly max(m-n, 0) times. +template<class InputOutputIt1, class InputIt2, class Compare> +InputOutputIt1 inplace_set_difference + (InputOutputIt1 first1, InputOutputIt1 last1, InputIt2 first2, InputIt2 last2, Compare comp ) +{ + while (first1 != last1) { + //Skip copying from range 1 if no element has to be skipped + if (first2 == last2){ + return last1; + } + else if (comp(*first1, *first2)){ + ++first1; + } + else{ + if (!comp(*first2, *first1)) { + InputOutputIt1 result = first1; + //An element from range 1 must be skipped, no longer an inplace operation + return boost::movelib::set_difference + ( boost::make_move_iterator(++first1) + , boost::make_move_iterator(last1) + , ++first2, last2, result, comp); + } + ++first2; + } + } + return first1; +} + +//Moves the elements from the sorted range [first1, last1) which are not found in the sorted +//range [first2, last2) to the range beginning at first1. +//The resulting range is also sorted. Equivalent elements from range 1 are moved past to end +//of the result, +//that is, if some element is found m times in [first1, last1) and n times in [first2, last2), +//it will be moved to result exactly max(m-n, 0) times. +//The resulting range cannot overlap with either of the input ranges. +template<class ForwardIt1, class InputIt2, + class OutputIt, class Compare> +OutputIt set_unique_difference + (ForwardIt1 first1, ForwardIt1 last1, InputIt2 first2, InputIt2 last2, OutputIt result, Compare comp) +{ + while (first1 != last1) { + if (first2 == last2){ + //unique_copy-like sequence with forward iterators but don't write i + //to result before comparing as moving *i could alter the value in i. + ForwardIt1 i = first1; + while (++first1 != last1) { + if (comp(*i, *first1)) { + *result = *i; + ++result; + i = first1; + } + } + *result = *i; + ++result; + break; + } + + if (comp(*first1, *first2)) { + //Skip equivalent elements in range1 but don't write i + //to result before comparing as moving *i could alter the value in i. + ForwardIt1 i = first1; + while (++first1 != last1) { + if (comp(*i, *first1)) { + break; + } + } + *result = *i; + ++result; + } + else { + if (comp(*first2, *first1)) { + ++first2; + } + else{ + ++first1; + } + } + } + return result; +} + +//Moves the elements from the sorted range [first1, last1) which are not found in the sorted +//range [first2, last2) to the range beginning at first1 (in place operation in range1). +//The resulting range is also sorted. Equivalent elements are treated individually, +//that is, if some element is found m times in [first1, last1) and n times in [first2, last2), +//it will be moved to result exactly max(m-n, 0) times. +template<class ForwardOutputIt1, class ForwardIt2, class Compare> +ForwardOutputIt1 inplace_set_unique_difference + (ForwardOutputIt1 first1, ForwardOutputIt1 last1, ForwardIt2 first2, ForwardIt2 last2, Compare comp ) +{ + while (first1 != last1) { + //Skip copying from range 1 if no element has to be skipped + if (first2 == last2){ + //unique-like algorithm for the remaining range 1 + ForwardOutputIt1 result = first1; + while (++first1 != last1) { + if (comp(*result, *first1) && ++result != first1) { + *result = boost::move(*first1); + } + } + return ++result; + } + else if (comp(*first2, *first1)) { + ++first2; + } + else if (comp(*first1, *first2)){ + //skip any adjacent equivalent elementin range 1 + ForwardOutputIt1 result = first1; + if (++first1 != last1 && !comp(*result, *first1)) { + //Some elements from range 1 must be skipped, no longer an inplace operation + while (++first1 != last1 && !comp(*result, *first1)){} + return boost::movelib::set_unique_difference + ( boost::make_move_iterator(first1) + , boost::make_move_iterator(last1) + , first2, last2, ++result, comp); + } + } + else{ + ForwardOutputIt1 result = first1; + //Some elements from range 1 must be skipped, no longer an inplace operation + while (++first1 != last1 && !comp(*result, *first1)){} + //An element from range 1 must be skipped, no longer an inplace operation + return boost::movelib::set_unique_difference + ( boost::make_move_iterator(first1) + , boost::make_move_iterator(last1) + , first2, last2, result, comp); + } + } + return first1; +} + + + +} //namespace movelib { +} //namespace boost { + +#endif //#define BOOST_MOVE_SET_DIFFERENCE_HPP diff --git a/boost/move/detail/type_traits.hpp b/boost/move/detail/type_traits.hpp index 272cb11af8..a3326d00e1 100644 --- a/boost/move/detail/type_traits.hpp +++ b/boost/move/detail/type_traits.hpp @@ -973,7 +973,7 @@ struct aligned_struct; template<std::size_t Len>\ struct BOOST_ALIGNMENT(A) aligned_struct<Len, A>\ {\ - char dummy[Len];\ + char data[Len];\ };\ // @@ -997,9 +997,10 @@ BOOST_MOVE_ALIGNED_STORAGE_WITH_BOOST_ALIGNMENT(0x1000) // Workaround for bogus [-Wignored-attributes] warning on GCC 6.x/7.x: don't use a type that "directly" carries the alignment attribute. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82270 template<std::size_t Len, std::size_t Align> -struct aligned_struct_wrapper +union aligned_struct_wrapper { - aligned_struct<Len, Align> dummy; + aligned_struct<Len, Align> aligner; + char data[sizeof(aligned_struct<Len, Align>)]; }; template<std::size_t Len, std::size_t Align> @@ -1014,7 +1015,7 @@ template<class T, std::size_t Len> union aligned_union { T aligner; - char dummy[Len]; + char data[Len]; }; template<std::size_t Len, std::size_t Align, class T, bool Ok> |