summaryrefslogtreecommitdiff
path: root/boost/move
diff options
context:
space:
mode:
Diffstat (limited to 'boost/move')
-rw-r--r--boost/move/adl_move_swap.hpp6
-rw-r--r--boost/move/algo/adaptive_merge.hpp253
-rw-r--r--boost/move/algo/adaptive_sort.hpp552
-rw-r--r--boost/move/algo/detail/adaptive_sort_merge.hpp1208
-rw-r--r--boost/move/algo/detail/heap_sort.hpp111
-rw-r--r--boost/move/algo/detail/insertion_sort.hpp10
-rw-r--r--boost/move/algo/detail/is_sorted.hpp55
-rw-r--r--boost/move/algo/detail/merge.hpp126
-rw-r--r--boost/move/algo/detail/pdqsort.hpp334
-rw-r--r--boost/move/algo/detail/set_difference.hpp207
-rw-r--r--boost/move/detail/type_traits.hpp9
11 files changed, 1793 insertions, 1078 deletions
diff --git a/boost/move/adl_move_swap.hpp b/boost/move/adl_move_swap.hpp
index d6906a483f..d9096e36c3 100644
--- a/boost/move/adl_move_swap.hpp
+++ b/boost/move/adl_move_swap.hpp
@@ -261,6 +261,12 @@ BidirIt2 adl_move_swap_ranges_backward(BidirIt1 first1, BidirIt1 last1, BidirIt2
return last2;
}
+template<class ForwardIt1, class ForwardIt2>
+void adl_move_iter_swap(ForwardIt1 a, ForwardIt2 b)
+{
+ boost::adl_move_swap(*a, *b);
+}
+
} //namespace boost{
#endif //#ifndef BOOST_MOVE_ADL_MOVE_SWAP_HPP
diff --git a/boost/move/algo/adaptive_merge.hpp b/boost/move/algo/adaptive_merge.hpp
index 0233b232e3..0040fda065 100644
--- a/boost/move/algo/adaptive_merge.hpp
+++ b/boost/move/algo/adaptive_merge.hpp
@@ -18,6 +18,259 @@
namespace boost {
namespace movelib {
+///@cond
+namespace detail_adaptive {
+
+template<class RandIt, class Compare, class XBuf>
+inline void adaptive_merge_combine_blocks( RandIt first
+ , typename iterator_traits<RandIt>::size_type len1
+ , typename iterator_traits<RandIt>::size_type len2
+ , typename iterator_traits<RandIt>::size_type collected
+ , typename iterator_traits<RandIt>::size_type n_keys
+ , typename iterator_traits<RandIt>::size_type l_block
+ , bool use_internal_buf
+ , bool xbuf_used
+ , Compare comp
+ , XBuf & xbuf
+ )
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ size_type const len = len1+len2;
+ size_type const l_combine = len-collected;
+ size_type const l_combine1 = len1-collected;
+
+ if(n_keys){
+ RandIt const first_data = first+collected;
+ RandIt const keys = first;
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
+ if(xbuf_used){
+ if(xbuf.size() < l_block){
+ xbuf.initialize_until(l_block, *first);
+ }
+ BOOST_ASSERT(xbuf.size() >= l_block);
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
+ combine_params( keys, comp, l_combine
+ , l_combine1, l_block, xbuf
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
+ op_merge_blocks_with_buf
+ (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len);
+ }
+ else{
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
+ combine_params( keys, comp, l_combine
+ , l_combine1, l_block, xbuf
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
+ if(use_internal_buf){
+ op_merge_blocks_with_buf
+ (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len);
+ }
+ else{
+ merge_blocks_bufferless
+ (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len);
+ }
+ }
+ }
+ else{
+ xbuf.shrink_to_fit(l_block);
+ if(xbuf.size() < l_block){
+ xbuf.initialize_until(l_block, *first);
+ }
+ size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
+ combine_params( uint_keys, less(), l_combine
+ , l_combine1, l_block, xbuf
+ , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
+ BOOST_ASSERT(xbuf.size() >= l_block);
+ op_merge_blocks_with_buf
+ (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
+ xbuf.clear();
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len);
+ }
+}
+
+template<class RandIt, class Compare, class XBuf>
+inline void adaptive_merge_final_merge( RandIt first
+ , typename iterator_traits<RandIt>::size_type len1
+ , typename iterator_traits<RandIt>::size_type len2
+ , typename iterator_traits<RandIt>::size_type collected
+ , typename iterator_traits<RandIt>::size_type l_intbuf
+ , typename iterator_traits<RandIt>::size_type l_block
+ , bool use_internal_buf
+ , bool xbuf_used
+ , Compare comp
+ , XBuf & xbuf
+ )
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ (void)l_block;
+ size_type n_keys = collected-l_intbuf;
+ size_type len = len1+len2;
+ if(use_internal_buf){
+ if(xbuf_used){
+ xbuf.clear();
+ //Nothing to do
+ if(n_keys){
+ unstable_sort(first, first+n_keys, comp, xbuf);
+ stable_merge(first, first+n_keys, first+len, comp, xbuf);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len);
+ }
+ }
+ else{
+ xbuf.clear();
+ unstable_sort(first, first+collected, comp, xbuf);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
+ stable_merge(first, first+collected, first+len, comp, xbuf);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
+ }
+ }
+ else{
+ xbuf.clear();
+ unstable_sort(first, first+collected, comp, xbuf);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
+ stable_merge(first, first+collected, first+len1+len2, comp, xbuf);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
+ }
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len);
+}
+
+template<class SizeType, class Xbuf>
+inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
+{
+ typedef SizeType size_type;
+ size_type l_block = rl_block;
+ size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
+
+ while(xbuf.capacity() >= l_block*2){
+ l_block *= 2;
+ }
+
+ //This is the minimum number of keys to implement the ideal algorithm
+ size_type n_keys = len1/l_block+len2/l_block;
+ while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){
+ --n_keys;
+ }
+ ++n_keys;
+ BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
+
+ if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
+ n_keys = 0u;
+ }
+ l_intbuf_inout = l_intbuf;
+ rl_block = l_block;
+ return n_keys;
+}
+
+// Main explanation of the merge algorithm.
+//
+// csqrtlen = ceil(sqrt(len));
+//
+// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
+// unique elements are extracted from elements to be sorted and placed in the beginning of the range.
+//
+// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
+// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
+//
+// Explanation of the "combine_blocks" step:
+//
+// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
+// Remaining elements that can't form a group are grouped in front of those elements.
+// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
+// Remaining elements that can't form a group are grouped in the back of those elements.
+// * In parallel the following two steps are performed:
+// * Groups are selection-sorted by first or last element (depending whether they are going
+// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
+// * Elements of each block pair are merged using the csqrtlen buffer taking into account
+// if they belong to the first half or second half (marked by the key).
+//
+// * In the final merge step leading "to_collect" elements are merged with rotations
+// with the rest of merged elements in the "combine_blocks" step.
+//
+// Corner cases:
+//
+// * If no "to_collect" elements can be extracted:
+//
+// * If more than a minimum number of elements is extracted
+// then reduces the number of elements used as buffer and keys in the
+// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
+// then uses a rotation based smart merge.
+//
+// * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
+//
+// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
+//
+// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
+//
+// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
+// then no csqrtlen need to be extracted and "combine_blocks" will use integral
+// keys to combine blocks.
+template<class RandIt, class Compare, class XBuf>
+void adaptive_merge_impl
+ ( RandIt first
+ , typename iterator_traits<RandIt>::size_type len1
+ , typename iterator_traits<RandIt>::size_type len2
+ , Compare comp
+ , XBuf & xbuf
+ )
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+
+ if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
+ buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
+ }
+ else{
+ const size_type len = len1+len2;
+ //Calculate ideal parameters and try to collect needed unique keys
+ size_type l_block = size_type(ceil_sqrt(len));
+
+ //One range is not big enough to extract keys and the internal buffer so a
+ //rotation-based based merge will do just fine
+ if(len1 <= l_block*2 || len2 <= l_block*2){
+ merge_bufferless(first, first+len1, first+len1+len2, comp);
+ return;
+ }
+
+ //Detail the number of keys and internal buffer. If xbuf has enough memory, no
+ //internal buffer is needed so l_intbuf will remain 0.
+ size_type l_intbuf = 0;
+ size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
+ size_type const to_collect = l_intbuf+n_keys;
+ //Try to extract needed unique values from the first range
+ size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len);
+
+ //Not the minimum number of keys is not available on the first range, so fallback to rotations
+ if(collected != to_collect && collected < 4){
+ merge_bufferless(first, first+collected, first+len1, comp);
+ merge_bufferless(first, first + len1, first + len1 + len2, comp);
+ return;
+ }
+
+ //If not enough keys but more than minimum, adjust the internal buffer and key count
+ bool use_internal_buf = collected == to_collect;
+ if (!use_internal_buf){
+ l_intbuf = 0u;
+ n_keys = collected;
+ l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
+ //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
+ l_intbuf = use_internal_buf ? l_block : 0u;
+ }
+
+ bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
+ //Merge trailing elements using smart merges
+ adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
+ //Merge buffer and keys with the rest of the values
+ adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
+ }
+}
+
+} //namespace detail_adaptive {
+
+///@endcond
+
//! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
//! into one sorted range [first, last) according to the given comparison function comp.
//! The algorithm is stable (if there are equivalent elements in the original two ranges,
diff --git a/boost/move/algo/adaptive_sort.hpp b/boost/move/algo/adaptive_sort.hpp
index c96ab2d78b..2026f9c1b5 100644
--- a/boost/move/algo/adaptive_sort.hpp
+++ b/boost/move/algo/adaptive_sort.hpp
@@ -18,6 +18,558 @@
namespace boost {
namespace movelib {
+///@cond
+namespace detail_adaptive {
+
+template<class RandIt>
+void move_data_backward( RandIt cur_pos
+ , typename iterator_traits<RandIt>::size_type const l_data
+ , RandIt new_pos
+ , bool const xbuf_used)
+{
+ //Move buffer to the total combination right
+ if(xbuf_used){
+ boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
+ }
+ else{
+ boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
+ //Rotate does less moves but it seems slower due to cache issues
+ //rotate_gcd(first-l_block, first+len-l_block, first+len);
+ }
+}
+
+template<class RandIt>
+void move_data_forward( RandIt cur_pos
+ , typename iterator_traits<RandIt>::size_type const l_data
+ , RandIt new_pos
+ , bool const xbuf_used)
+{
+ //Move buffer to the total combination right
+ if(xbuf_used){
+ boost::move(cur_pos, cur_pos+l_data, new_pos);
+ }
+ else{
+ boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
+ //Rotate does less moves but it seems slower due to cache issues
+ //rotate_gcd(first-l_block, first+len-l_block, first+len);
+ }
+}
+
+// build blocks of length 2*l_build_buf. l_build_buf is power of two
+// input: [0, l_build_buf) elements are buffer, rest unsorted elements
+// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
+//
+// First elements are merged from right to left until elements start
+// at first. All old elements [first, first + l_build_buf) are placed at the end
+// [first+len-l_build_buf, first+len). To achieve this:
+// - If we have external memory to merge, we save elements from the buffer
+// so that a non-swapping merge is used. Buffer elements are restored
+// at the end of the buffer from the external memory.
+//
+// - When the external memory is not available or it is insufficient
+// for a merge operation, left swap merging is used.
+//
+// Once elements are merged left to right in blocks of l_build_buf, then a single left
+// to right merge step is performed to achieve merged blocks of size 2K.
+// If external memory is available, usual merge is used, swap merging otherwise.
+//
+// As a last step, if auxiliary memory is available in-place merge is performed.
+// until all is merged or auxiliary memory is not large enough.
+template<class RandIt, class Compare, class XBuf>
+typename iterator_traits<RandIt>::size_type
+ adaptive_sort_build_blocks
+ ( RandIt const first
+ , typename iterator_traits<RandIt>::size_type const len
+ , typename iterator_traits<RandIt>::size_type const l_base
+ , typename iterator_traits<RandIt>::size_type const l_build_buf
+ , XBuf & xbuf
+ , Compare comp)
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ BOOST_ASSERT(l_build_buf <= len);
+ BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
+
+ //Place the start pointer after the buffer
+ RandIt first_block = first + l_build_buf;
+ size_type const elements_in_blocks = len - l_build_buf;
+
+ //////////////////////////////////
+ // Start of merge to left step
+ //////////////////////////////////
+ size_type l_merged = 0u;
+
+ BOOST_ASSERT(l_build_buf);
+ //If there is no enough buffer for the insertion sort step, just avoid the external buffer
+ size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
+ kbuf = kbuf < l_base ? 0 : kbuf;
+
+ if(kbuf){
+ //Backup internal buffer values in external buffer so they can be overwritten
+ xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
+ l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
+
+ //Now combine them using the buffer. Elements from buffer can be
+ //overwritten since they've been saved to xbuf
+ l_merged = op_merge_left_step_multiple
+ ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
+
+ //Restore internal buffer from external buffer unless kbuf was l_build_buf,
+ //in that case restoration will happen later
+ if(kbuf != l_build_buf){
+ boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
+ }
+ }
+ else{
+ l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
+ rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
+ }
+
+ //Now combine elements using the buffer. Elements from buffer can't be
+ //overwritten since xbuf was not big enough, so merge swapping elements.
+ l_merged = op_merge_left_step_multiple
+ (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
+
+ BOOST_ASSERT(l_merged == l_build_buf);
+
+ //////////////////////////////////
+ // Start of merge to right step
+ //////////////////////////////////
+
+ //If kbuf is l_build_buf then we can merge right without swapping
+ //Saved data is still in xbuf
+ if(kbuf && kbuf == l_build_buf){
+ op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
+ //Restore internal buffer from external buffer if kbuf was l_build_buf.
+ //as this operation was previously delayed.
+ boost::move(xbuf.data(), xbuf.data() + kbuf, first);
+ }
+ else{
+ op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
+ }
+ xbuf.clear();
+ //2*l_build_buf or total already merged
+ return min_value(elements_in_blocks, 2*l_build_buf);
+}
+
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
+void adaptive_sort_combine_blocks
+ ( RandItKeys const keys
+ , KeyCompare key_comp
+ , RandIt const first
+ , typename iterator_traits<RandIt>::size_type const len
+ , typename iterator_traits<RandIt>::size_type const l_prev_merged
+ , typename iterator_traits<RandIt>::size_type const l_block
+ , bool const use_buf
+ , bool const xbuf_used
+ , XBuf & xbuf
+ , Compare comp
+ , bool merge_left)
+{
+ (void)xbuf;
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+
+ size_type const l_reg_combined = 2*l_prev_merged;
+ size_type l_irreg_combined = 0;
+ size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
+ size_type const n_reg_combined = len/l_reg_combined;
+ RandIt combined_first = first;
+
+ (void)l_total_combined;
+ BOOST_ASSERT(l_total_combined <= len);
+
+ size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
+
+ if(merge_left || !use_buf) {
+ for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) {
+ //Now merge blocks
+ bool const is_last = combined_i==n_reg_combined;
+ size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
+
+ range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
+ combine_params( keys, key_comp, l_cur_combined
+ , l_prev_merged, l_block, rbuf
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
+ if(!use_buf){
+ merge_blocks_bufferless
+ (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
+ }
+ else{
+ merge_blocks_left
+ (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
+ }
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block);
+ }
+ }
+ else{
+ combined_first += l_reg_combined*(max_i-1);
+ for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) {
+ bool const is_last = combined_i==n_reg_combined;
+ size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
+
+ RandIt const combined_last(combined_first+l_cur_combined);
+ range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
+ combine_params( keys, key_comp, l_cur_combined
+ , l_prev_merged, l_block, rbuf
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
+ merge_blocks_right
+ (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block);
+ }
+ }
+}
+
+//Returns true if buffer is placed in
+//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
+//[buffer,buffer+l_intbuf)
+template<class RandIt, class Compare, class XBuf>
+bool adaptive_sort_combine_all_blocks
+ ( RandIt keys
+ , typename iterator_traits<RandIt>::size_type &n_keys
+ , RandIt const buffer
+ , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
+ , typename iterator_traits<RandIt>::size_type l_merged
+ , typename iterator_traits<RandIt>::size_type &l_intbuf
+ , XBuf & xbuf
+ , Compare comp)
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ RandIt const first = buffer + l_intbuf;
+ size_type const l_data = l_buf_plus_data - l_intbuf;
+ size_type const l_unique = l_intbuf+n_keys;
+ //Backup data to external buffer once if possible
+ bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
+ if(common_xbuf){
+ xbuf.move_assign(buffer, l_intbuf);
+ }
+
+ bool prev_merge_left = true;
+ size_type l_prev_total_combined = l_merged, l_prev_block = 0;
+ bool prev_use_internal_buf = true;
+
+ for( size_type n = 0; l_data > l_merged
+ ; l_merged*=2
+ , ++n){
+ //If l_intbuf is non-zero, use that internal buffer.
+ // Implies l_block == l_intbuf && use_internal_buf == true
+ //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
+ // Implies l_block == n_keys/2 && use_internal_buf == true
+ //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
+ bool use_internal_buf = false;
+ size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf);
+ BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
+ BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
+ BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
+
+ bool const is_merge_left = (n&1) == 0;
+ size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
+ if(n && prev_use_internal_buf && prev_merge_left){
+ if(is_merge_left || !use_internal_buf){
+ move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
+ }
+ else{
+ //Put the buffer just after l_total_combined
+ RandIt const buf_end = first+l_prev_total_combined;
+ RandIt const buf_beg = buf_end-l_block;
+ if(l_prev_total_combined > l_total_combined){
+ size_type const l_diff = l_prev_total_combined - l_total_combined;
+ move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
+ }
+ else if(l_prev_total_combined < l_total_combined){
+ size_type const l_diff = l_total_combined - l_prev_total_combined;
+ move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
+ }
+ }
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf);
+ }
+
+ //Combine to form l_merged*2 segments
+ if(n_keys){
+ adaptive_sort_combine_blocks
+ ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
+ , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
+ }
+ else{
+ size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
+ adaptive_sort_combine_blocks
+ ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
+ , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
+ }
+
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf);
+ prev_merge_left = is_merge_left;
+ l_prev_total_combined = l_total_combined;
+ l_prev_block = l_block;
+ prev_use_internal_buf = use_internal_buf;
+ }
+ BOOST_ASSERT(l_prev_total_combined == l_data);
+ bool const buffer_right = prev_use_internal_buf && prev_merge_left;
+
+ l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
+ n_keys = l_unique - l_intbuf;
+ //Restore data from to external common buffer if used
+ if(common_xbuf){
+ if(buffer_right){
+ boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
+ }
+ else{
+ boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
+ }
+ }
+ return buffer_right;
+}
+
+
+template<class RandIt, class Compare, class XBuf>
+void adaptive_sort_final_merge( bool buffer_right
+ , RandIt const first
+ , typename iterator_traits<RandIt>::size_type const l_intbuf
+ , typename iterator_traits<RandIt>::size_type const n_keys
+ , typename iterator_traits<RandIt>::size_type const len
+ , XBuf & xbuf
+ , Compare comp)
+{
+ //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
+ xbuf.clear();
+
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ size_type const n_key_plus_buf = l_intbuf+n_keys;
+ if(buffer_right){
+ //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
+ stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
+ stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
+ unstable_sort(first, first+n_keys, comp, xbuf);
+ stable_merge(first, first+n_keys, first+len, comp, xbuf);
+ }
+ else{
+ //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
+ stable_sort(first, first+n_key_plus_buf, comp, xbuf);
+ if(xbuf.capacity() >= n_key_plus_buf){
+ buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
+ }
+ else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
+ stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
+ stable_merge(first, first+n_keys, first+len, comp, xbuf);
+ }
+ else{
+ stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
+ }
+ }
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len);
+}
+
+template<class RandIt, class Compare, class Unsigned, class XBuf>
+bool adaptive_sort_build_params
+ (RandIt first, Unsigned const len, Compare comp
+ , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
+ , XBuf & xbuf
+ )
+{
+ typedef Unsigned size_type;
+
+ //Calculate ideal parameters and try to collect needed unique keys
+ l_base = 0u;
+
+ //Try to find a value near sqrt(len) that is 2^N*l_base where
+ //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
+ //as build_blocks merges to the left iteratively duplicating the
+ //merged size and all the buffer must be used just before the final
+ //merge to right step. This guarantees "build_blocks" produces
+ //segments of size l_build_buf*2, maximizing the classic merge phase.
+ l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
+
+ //The internal buffer can be expanded if there is enough external memory
+ while(xbuf.capacity() >= l_intbuf*2){
+ l_intbuf *= 2;
+ }
+
+ //This is the minimum number of keys to implement the ideal algorithm
+ //
+ //l_intbuf is used as buffer plus the key count
+ size_type n_min_ideal_keys = l_intbuf-1;
+ while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
+ --n_min_ideal_keys;
+ }
+ n_min_ideal_keys += 1;
+ BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
+
+ if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
+ n_keys = 0u;
+ l_build_buf = l_intbuf;
+ }
+ else{
+ //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
+ //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
+ //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
+ //
+ //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
+ //(to be used for keys in combine_all_blocks) as the whole l_build_buf
+ //will be backuped in the buffer during build_blocks.
+ bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
+ size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
+ size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
+
+ //If available memory is 2*sqrt(l), then for "build_params"
+ //the situation is the same as if 2*l_intbuf were collected.
+ if(non_unique_buf && collected == n_min_ideal_keys){
+ l_build_buf = l_intbuf;
+ n_keys = n_min_ideal_keys;
+ }
+ else if(collected == 2*l_intbuf){
+ //l_intbuf*2 elements found. Use all of them in the build phase
+ l_build_buf = l_intbuf*2;
+ n_keys = l_intbuf;
+ }
+ else if(collected == (n_min_ideal_keys+l_intbuf)){
+ l_build_buf = l_intbuf;
+ n_keys = n_min_ideal_keys;
+ }
+ //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
+ //is possible (due to very low unique keys), then go to a slow sort based on rotations.
+ else{
+ BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
+ if(collected < 4){ //No combination possible with less that 4 keys
+ return false;
+ }
+ n_keys = l_intbuf;
+ while(n_keys&(n_keys-1)){
+ n_keys &= n_keys-1; // make it power or 2
+ }
+ while(n_keys > collected){
+ n_keys/=2;
+ }
+ //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
+ l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
+ l_intbuf = 0;
+ l_build_buf = n_keys;
+ }
+ BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
+ }
+
+ return true;
+}
+
+// Main explanation of the sort algorithm.
+//
+// csqrtlen = ceil(sqrt(len));
+//
+// * First, 2*csqrtlen unique elements elements are extracted from elements to be
+// sorted and placed in the beginning of the range.
+//
+// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
+// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
+// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
+// 2*csqrtlen unique elements are again the leading elements of the whole range.
+//
+// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
+// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
+// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
+// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
+//
+// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
+// know if elements belong to the first or second block to be merged and another
+// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
+//
+// Iteratively until all trailing (len-2*csqrtlen) elements are merged:
+// Iteratively for each pair of previously merged block:
+// * Blocks are divided groups of csqrtlen elements and
+// 2*merged_block/csqrtlen keys are sorted to be used as markers
+// * Groups are selection-sorted by first or last element (depending whether they are going
+// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
+// * Elements of each block pair are merged using the csqrtlen buffer taking into account
+// if they belong to the first half or second half (marked by the key).
+//
+// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
+// rotations with the rest of sorted elements in the "combine_blocks" step.
+//
+// Corner cases:
+//
+// * If no 2*csqrtlen elements can be extracted:
+//
+// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
+// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
+// means that an additional "combine_blocks" step will be needed to merge all elements.
+//
+// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
+// then reduces the number of elements used as buffer and keys in the "build_blocks"
+// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
+// then uses a rotation based smart merge.
+//
+// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
+//
+// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
+//
+// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
+// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
+// keys to combine blocks.
+//
+// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
+// using classic merge and "combine_blocks" will use bigger blocks when merging.
+template<class RandIt, class Compare, class XBuf>
+void adaptive_sort_impl
+ ( RandIt first
+ , typename iterator_traits<RandIt>::size_type const len
+ , Compare comp
+ , XBuf & xbuf
+ )
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+
+ //Small sorts go directly to insertion sort
+ if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
+ insertion_sort(first, first + len, comp);
+ }
+ else if((len-len/2) <= xbuf.capacity()){
+ merge_sort(first, first+len, comp, xbuf.data());
+ }
+ else{
+ //Make sure it is at least four
+ BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
+
+ size_type l_base = 0;
+ size_type l_intbuf = 0;
+ size_type n_keys = 0;
+ size_type l_build_buf = 0;
+
+ //Calculate and extract needed unique elements. If a minimum is not achieved
+ //fallback to a slow stable sort
+ if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
+ stable_sort(first, first+len, comp, xbuf);
+ }
+ else{
+ BOOST_ASSERT(l_build_buf);
+ //Otherwise, continue the adaptive_sort
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len);
+ size_type const n_key_plus_buf = l_intbuf+n_keys;
+ //l_build_buf is always power of two if l_intbuf is zero
+ BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
+
+ //Classic merge sort until internal buffer and xbuf are exhausted
+ size_type const l_merged = adaptive_sort_build_blocks
+ (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len);
+
+ //Non-trivial merge
+ bool const buffer_right = adaptive_sort_combine_all_blocks
+ (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
+
+ //Sort keys and buffer and merge the whole sequence
+ adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
+ }
+ }
+}
+
+} //namespace detail_adaptive {
+
+///@endcond
+
//! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
//! to comparison functor "comp". The sort is stable (order of equal elements
//! is guaranteed to be preserved). Performance is improved if additional raw storage is
diff --git a/boost/move/algo/detail/adaptive_sort_merge.hpp b/boost/move/algo/detail/adaptive_sort_merge.hpp
index 5085100ad0..1606fde66a 100644
--- a/boost/move/algo/detail/adaptive_sort_merge.hpp
+++ b/boost/move/algo/detail/adaptive_sort_merge.hpp
@@ -49,7 +49,9 @@
#include <boost/move/adl_move_swap.hpp>
#include <boost/move/algo/detail/insertion_sort.hpp>
#include <boost/move/algo/detail/merge_sort.hpp>
+#include <boost/move/algo/detail/heap_sort.hpp>
#include <boost/move/algo/detail/merge.hpp>
+#include <boost/move/algo/detail/is_sorted.hpp>
#include <boost/assert.hpp>
#include <boost/cstdint.hpp>
@@ -84,11 +86,26 @@
#define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L)
#endif
-
-
namespace boost {
namespace movelib {
+#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS)
+
+bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less)
+{
+ if (first != last) {
+ const order_perf_type *next = first, *cur(first);
+ while (++next != last) {
+ if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val)))
+ return false;
+ cur = next;
+ }
+ }
+ return true;
+}
+
+#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
+
namespace detail_adaptive {
static const std::size_t AdaptiveSortInsertionSortThreshold = 16;
@@ -113,51 +130,6 @@ const T &max_value(const T &a, const T &b)
return a > b ? a : b;
}
-template<class ForwardIt, class Pred>
-bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred)
-{
- if (first != last) {
- ForwardIt next = first, cur(first);
- while (++next != last) {
- if (pred(*next, *cur))
- return false;
- cur = next;
- }
- }
- return true;
-}
-
-#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS)
-
-bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less)
-{
- if (first != last) {
- const order_perf_type *next = first, *cur(first);
- while (++next != last) {
- if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val)))
- return false;
- cur = next;
- }
- }
- return true;
-}
-
-#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
-
-template<class ForwardIt, class Pred>
-bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred)
-{
- if (first != last) {
- ForwardIt next = first;
- while (++next != last) {
- if (!pred(*first, *next))
- return false;
- first = next;
- }
- }
- return true;
-}
-
template<class ForwardIt, class Pred, class V>
typename iterator_traits<ForwardIt>::size_type
count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v)
@@ -414,44 +386,6 @@ RandIt skip_until_merge
}
-template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
-RandItB op_buffered_partial_merge_to_range1_and_buffer
- ( RandIt1 first1, RandIt1 const last1
- , RandIt2 &rfirst2, RandIt2 const last2
- , RandItB &rfirstb, Compare comp, Op op )
-{
- RandItB firstb = rfirstb;
- RandItB lastb = firstb;
- RandIt2 first2 = rfirst2;
-
- //Move to buffer while merging
- //Three way moves need less moves when op is swap_op so use it
- //when merging elements from range2 to the destination occupied by range1
- if(first1 != last1 && first2 != last2){
- op(three_way_t(), first2++, first1++, lastb++);
-
- while(true){
- if(first1 == last1){
- break;
- }
- if(first2 == last2){
- lastb = op(forward_t(), first1, last1, firstb);
- break;
- }
- if (comp(*first2, *firstb)) {
- op(three_way_t(), first2++, first1++, lastb++);
- }
- else {
- op(three_way_t(), firstb++, first1++, lastb++);
- }
- }
- rfirst2 = first2;
- rfirstb = firstb;
- }
-
- return lastb;
-}
-
template<class RandItKeys, class RandIt>
void swap_and_update_key
( RandItKeys const key_next
@@ -567,7 +501,7 @@ void merge_blocks_bufferless
typedef typename iterator_traits<RandIt>::size_type size_type;
size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
//BOOST_ASSERT(n_block_a || n_block_b);
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp));
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
size_type n_bef_irreg2 = 0;
@@ -890,11 +824,40 @@ void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf)
}
template<class RandIt, class Comp, class XBuf>
+void unstable_sort( RandIt first, RandIt last
+ , Comp comp
+ , XBuf & xbuf)
+{
+ heap_sort(first, last, comp);(void)xbuf;
+}
+
+template<class RandIt, class Compare, class XBuf>
+void stable_merge
+ ( RandIt first, RandIt const middle, RandIt last
+ , Compare comp
+ , XBuf &xbuf)
+{
+ BOOST_ASSERT(xbuf.empty());
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ size_type const len1 = size_type(middle-first);
+ size_type const len2 = size_type(last-middle);
+ size_type const l_min = min_value(len1, len2);
+ if(xbuf.capacity() >= l_min){
+ buffered_merge(first, middle, last, comp, xbuf);
+ xbuf.clear();
+ }
+ else{
+ merge_bufferless(first, middle, last, comp);
+ }
+}
+
+template<class RandIt, class Comp, class XBuf>
void initialize_keys( RandIt first, RandIt last
, Comp comp
, XBuf & xbuf)
{
- stable_sort(first, last, comp, xbuf);
+ unstable_sort(first, last, comp, xbuf);
+ BOOST_ASSERT(boost::movelib::is_sorted_and_unique(first, last, comp));
}
template<class RandIt, class U>
@@ -910,40 +873,6 @@ void initialize_keys( RandIt first, RandIt last
}
}
-template<class RandIt>
-void move_data_backward( RandIt cur_pos
- , typename iterator_traits<RandIt>::size_type const l_data
- , RandIt new_pos
- , bool const xbuf_used)
-{
- //Move buffer to the total combination right
- if(xbuf_used){
- boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
- }
- else{
- boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
- //Rotate does less moves but it seems slower due to cache issues
- //rotate_gcd(first-l_block, first+len-l_block, first+len);
- }
-}
-
-template<class RandIt>
-void move_data_forward( RandIt cur_pos
- , typename iterator_traits<RandIt>::size_type const l_data
- , RandIt new_pos
- , bool const xbuf_used)
-{
- //Move buffer to the total combination right
- if(xbuf_used){
- boost::move(cur_pos, cur_pos+l_data, new_pos);
- }
- else{
- boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
- //Rotate does less moves but it seems slower due to cache issues
- //rotate_gcd(first-l_block, first+len-l_block, first+len);
- }
-}
-
template <class Unsigned>
Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0)
{
@@ -994,43 +923,7 @@ void combine_params
}
}
-template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
-RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer
- ( RandIt1 first1, RandIt1 const last1
- , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min
- , RandItB &rfirstb, Compare comp, Op op )
-{
- RandItB firstb = rfirstb;
- RandItB lastb = firstb;
- RandIt2 first2 = rfirst2;
- //Move to buffer while merging
- //Three way moves need less moves when op is swap_op so use it
- //when merging elements from range2 to the destination occupied by range1
- if(first1 != last1 && first2 != last2){
- RandIt2 first_min = rfirst_min;
- op(four_way_t(), first2++, first_min++, first1++, lastb++);
-
- while(first1 != last1){
- if(first2 == last2){
- lastb = op(forward_t(), first1, last1, firstb);
- break;
- }
-
- if(comp(*first_min, *firstb)){
- op( four_way_t(), first2++, first_min++, first1++, lastb++);
- }
- else{
- op(three_way_t(), firstb++, first1++, lastb++);
- }
- }
- rfirst2 = first2;
- rfirstb = firstb;
- rfirst_min = first_min;
- }
-
- return lastb;
-}
//////////////////////////////////
//
@@ -1072,10 +965,14 @@ OutputIt op_partial_merge
}
//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
//
-// partial_merge_and_swap
+// op_partial_merge_and_save
//
//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
OutputIt op_partial_merge_and_swap_impl
(InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op)
@@ -1111,6 +1008,82 @@ OutputIt op_partial_merge_and_swap
: op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable<Compare>(comp), op);
}
+template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
+RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer
+ ( RandIt1 first1, RandIt1 const last1
+ , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min
+ , RandItB &rfirstb, Compare comp, Op op )
+{
+ RandItB firstb = rfirstb;
+ RandItB lastb = firstb;
+ RandIt2 first2 = rfirst2;
+
+ //Move to buffer while merging
+ //Three way moves need less moves when op is swap_op so use it
+ //when merging elements from range2 to the destination occupied by range1
+ if(first1 != last1 && first2 != last2){
+ RandIt2 first_min = rfirst_min;
+ op(four_way_t(), first2++, first_min++, first1++, lastb++);
+
+ while(first1 != last1){
+ if(first2 == last2){
+ lastb = op(forward_t(), first1, last1, firstb);
+ break;
+ }
+
+ if(comp(*first_min, *firstb)){
+ op( four_way_t(), first2++, first_min++, first1++, lastb++);
+ }
+ else{
+ op(three_way_t(), firstb++, first1++, lastb++);
+ }
+ }
+ rfirst2 = first2;
+ rfirstb = firstb;
+ rfirst_min = first_min;
+ }
+
+ return lastb;
+}
+
+template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
+RandItB op_buffered_partial_merge_to_range1_and_buffer
+ ( RandIt1 first1, RandIt1 const last1
+ , RandIt2 &rfirst2, RandIt2 const last2
+ , RandItB &rfirstb, Compare comp, Op op )
+{
+ RandItB firstb = rfirstb;
+ RandItB lastb = firstb;
+ RandIt2 first2 = rfirst2;
+
+ //Move to buffer while merging
+ //Three way moves need less moves when op is swap_op so use it
+ //when merging elements from range2 to the destination occupied by range1
+ if(first1 != last1 && first2 != last2){
+ op(three_way_t(), first2++, first1++, lastb++);
+
+ while(true){
+ if(first1 == last1){
+ break;
+ }
+ if(first2 == last2){
+ lastb = op(forward_t(), first1, last1, firstb);
+ break;
+ }
+ if (comp(*first2, *firstb)) {
+ op(three_way_t(), first2++, first1++, lastb++);
+ }
+ else {
+ op(three_way_t(), firstb++, first1++, lastb++);
+ }
+ }
+ rfirst2 = first2;
+ rfirstb = firstb;
+ }
+
+ return lastb;
+}
+
template<class RandIt, class RandItBuf, class Compare, class Op>
RandIt op_partial_merge_and_save_impl
( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min
@@ -1162,7 +1135,15 @@ RandIt op_partial_merge_and_save
;
}
-
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+//
+// op_merge_blocks_with_irreg
+//
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
template<class RandItKeys, class KeyCompare, class RandIt, class RandIt2, class OutputIt, class Compare, class Op>
OutputIt op_merge_blocks_with_irreg
@@ -1215,6 +1196,16 @@ OutputIt op_merge_blocks_with_irreg
return dest;
}
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+//
+// op_merge_blocks_left/right
+//
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+
template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op>
void op_merge_blocks_left
( RandItKeys const key_first
@@ -1230,7 +1221,7 @@ void op_merge_blocks_left
typedef typename iterator_traits<RandIt>::size_type size_type;
size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
// BOOST_ASSERT(n_block_a || n_block_b);
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp));
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
size_type n_block_b_left = n_block_b;
@@ -1394,7 +1385,6 @@ void merge_blocks_left
}
}
-
// first - first element to merge.
// [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer
// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
@@ -1426,6 +1416,15 @@ void merge_blocks_right
, inverse<Compare>(comp), xbuf_used);
}
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+//
+// op_merge_blocks_with_buf
+//
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class RandItBuf>
void op_merge_blocks_with_buf
( RandItKeys key_first
@@ -1443,7 +1442,7 @@ void op_merge_blocks_with_buf
typedef typename iterator_traits<RandIt>::size_type size_type;
size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
//BOOST_ASSERT(n_block_a || n_block_b);
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp));
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
size_type n_block_b_left = n_block_b;
@@ -1552,29 +1551,15 @@ void op_merge_blocks_with_buf
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp));
}
-template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class RandItBuf>
-void merge_blocks_with_buf
- ( RandItKeys key_first
- , KeyCompare key_comp
- , RandIt const first
- , typename iterator_traits<RandIt>::size_type const l_block
- , typename iterator_traits<RandIt>::size_type const l_irreg1
- , typename iterator_traits<RandIt>::size_type const n_block_a
- , typename iterator_traits<RandIt>::size_type const n_block_b
- , typename iterator_traits<RandIt>::size_type const l_irreg2
- , Compare comp
- , RandItBuf const buf_first
- , bool const xbuf_used)
-{
- if(xbuf_used){
- op_merge_blocks_with_buf
- (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first);
- }
- else{
- op_merge_blocks_with_buf
- (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first);
- }
-}
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+//
+// op_insertion_sort_step_left/right
+//
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
template<class RandIt, class Compare, class Op>
typename iterator_traits<RandIt>::size_type
@@ -1596,6 +1581,41 @@ typename iterator_traits<RandIt>::size_type
return s;
}
+template<class RandIt, class Compare, class Op>
+void op_merge_right_step_once
+ ( RandIt first_block
+ , typename iterator_traits<RandIt>::size_type const elements_in_blocks
+ , typename iterator_traits<RandIt>::size_type const l_build_buf
+ , Compare comp
+ , Op op)
+{
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+ size_type restk = elements_in_blocks%(2*l_build_buf);
+ size_type p = elements_in_blocks - restk;
+ BOOST_ASSERT(0 == (p%(2*l_build_buf)));
+
+ if(restk <= l_build_buf){
+ op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf);
+ }
+ else{
+ op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op);
+ }
+ while(p>0){
+ p -= 2*l_build_buf;
+ op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op);
+ }
+}
+
+
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+//
+// insertion_sort_step
+//
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
template<class RandIt, class Compare>
typename iterator_traits<RandIt>::size_type
insertion_sort_step
@@ -1616,6 +1636,15 @@ typename iterator_traits<RandIt>::size_type
return s;
}
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
+//
+// op_merge_left_step_multiple
+//
+//////////////////////////////////
+//////////////////////////////////
+//////////////////////////////////
template<class RandIt, class Compare, class Op>
typename iterator_traits<RandIt>::size_type
op_merge_left_step_multiple
@@ -1651,815 +1680,6 @@ typename iterator_traits<RandIt>::size_type
return l_merged;
}
-template<class RandIt, class Compare, class Op>
-void op_merge_right_step_once
- ( RandIt first_block
- , typename iterator_traits<RandIt>::size_type const elements_in_blocks
- , typename iterator_traits<RandIt>::size_type const l_build_buf
- , Compare comp
- , Op op)
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
- size_type restk = elements_in_blocks%(2*l_build_buf);
- size_type p = elements_in_blocks - restk;
- BOOST_ASSERT(0 == (p%(2*l_build_buf)));
-
- if(restk <= l_build_buf){
- op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf);
- }
- else{
- op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op);
- }
- while(p>0){
- p -= 2*l_build_buf;
- op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op);
- }
-}
-
-
-// build blocks of length 2*l_build_buf. l_build_buf is power of two
-// input: [0, l_build_buf) elements are buffer, rest unsorted elements
-// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
-//
-// First elements are merged from right to left until elements start
-// at first. All old elements [first, first + l_build_buf) are placed at the end
-// [first+len-l_build_buf, first+len). To achieve this:
-// - If we have external memory to merge, we save elements from the buffer
-// so that a non-swapping merge is used. Buffer elements are restored
-// at the end of the buffer from the external memory.
-//
-// - When the external memory is not available or it is insufficient
-// for a merge operation, left swap merging is used.
-//
-// Once elements are merged left to right in blocks of l_build_buf, then a single left
-// to right merge step is performed to achieve merged blocks of size 2K.
-// If external memory is available, usual merge is used, swap merging otherwise.
-//
-// As a last step, if auxiliary memory is available in-place merge is performed.
-// until all is merged or auxiliary memory is not large enough.
-template<class RandIt, class Compare, class XBuf>
-typename iterator_traits<RandIt>::size_type
- adaptive_sort_build_blocks
- ( RandIt const first
- , typename iterator_traits<RandIt>::size_type const len
- , typename iterator_traits<RandIt>::size_type const l_base
- , typename iterator_traits<RandIt>::size_type const l_build_buf
- , XBuf & xbuf
- , Compare comp)
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
- BOOST_ASSERT(l_build_buf <= len);
- BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
-
- //Place the start pointer after the buffer
- RandIt first_block = first + l_build_buf;
- size_type const elements_in_blocks = len - l_build_buf;
-
- //////////////////////////////////
- // Start of merge to left step
- //////////////////////////////////
- size_type l_merged = 0u;
-
- BOOST_ASSERT(l_build_buf);
- //If there is no enough buffer for the insertion sort step, just avoid the external buffer
- size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
- kbuf = kbuf < l_base ? 0 : kbuf;
-
- if(kbuf){
- //Backup internal buffer values in external buffer so they can be overwritten
- xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
- l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
-
- //Now combine them using the buffer. Elements from buffer can be
- //overwritten since they've been saved to xbuf
- l_merged = op_merge_left_step_multiple
- ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
-
- //Restore internal buffer from external buffer unless kbuf was l_build_buf,
- //in that case restoration will happen later
- if(kbuf != l_build_buf){
- boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
- }
- }
- else{
- l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
- rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
- }
-
- //Now combine elements using the buffer. Elements from buffer can't be
- //overwritten since xbuf was not big enough, so merge swapping elements.
- l_merged = op_merge_left_step_multiple
- (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
-
- BOOST_ASSERT(l_merged == l_build_buf);
-
- //////////////////////////////////
- // Start of merge to right step
- //////////////////////////////////
-
- //If kbuf is l_build_buf then we can merge right without swapping
- //Saved data is still in xbuf
- if(kbuf && kbuf == l_build_buf){
- op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
- //Restore internal buffer from external buffer if kbuf was l_build_buf.
- //as this operation was previously delayed.
- boost::move(xbuf.data(), xbuf.data() + kbuf, first);
- }
- else{
- op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
- }
- xbuf.clear();
- //2*l_build_buf or total already merged
- return min_value(elements_in_blocks, 2*l_build_buf);
-}
-
-template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
-void adaptive_sort_combine_blocks
- ( RandItKeys const keys
- , KeyCompare key_comp
- , RandIt const first
- , typename iterator_traits<RandIt>::size_type const len
- , typename iterator_traits<RandIt>::size_type const l_prev_merged
- , typename iterator_traits<RandIt>::size_type const l_block
- , bool const use_buf
- , bool const xbuf_used
- , XBuf & xbuf
- , Compare comp
- , bool merge_left)
-{
- (void)xbuf;
- typedef typename iterator_traits<RandIt>::size_type size_type;
-
- size_type const l_reg_combined = 2*l_prev_merged;
- size_type l_irreg_combined = 0;
- size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
- size_type const n_reg_combined = len/l_reg_combined;
- RandIt combined_first = first;
-
- (void)l_total_combined;
- BOOST_ASSERT(l_total_combined <= len);
-
- size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
-
- if(merge_left || !use_buf) {
- for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) {
- //Now merge blocks
- bool const is_last = combined_i==n_reg_combined;
- size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
-
- range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
- size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
- combine_params( keys, key_comp, l_cur_combined
- , l_prev_merged, l_block, rbuf
- , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
- if(!use_buf){
- merge_blocks_bufferless
- (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
- }
- else{
- merge_blocks_left
- (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
- }
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block);
- }
- }
- else{
- combined_first += l_reg_combined*(max_i-1);
- for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) {
- bool const is_last = combined_i==n_reg_combined;
- size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
-
- RandIt const combined_last(combined_first+l_cur_combined);
- range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
- size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
- combine_params( keys, key_comp, l_cur_combined
- , l_prev_merged, l_block, rbuf
- , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
- BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
- merge_blocks_right
- (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block);
- }
- }
-}
-
-//Returns true if buffer is placed in
-//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
-//[buffer,buffer+l_intbuf)
-template<class RandIt, class Compare, class XBuf>
-bool adaptive_sort_combine_all_blocks
- ( RandIt keys
- , typename iterator_traits<RandIt>::size_type &n_keys
- , RandIt const buffer
- , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
- , typename iterator_traits<RandIt>::size_type l_merged
- , typename iterator_traits<RandIt>::size_type &l_intbuf
- , XBuf & xbuf
- , Compare comp)
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
- RandIt const first = buffer + l_intbuf;
- size_type const l_data = l_buf_plus_data - l_intbuf;
- size_type const l_unique = l_intbuf+n_keys;
- //Backup data to external buffer once if possible
- bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
- if(common_xbuf){
- xbuf.move_assign(buffer, l_intbuf);
- }
-
- bool prev_merge_left = true;
- size_type l_prev_total_combined = l_merged, l_prev_block = 0;
- bool prev_use_internal_buf = true;
-
- for( size_type n = 0; l_data > l_merged
- ; l_merged*=2
- , ++n){
- //If l_intbuf is non-zero, use that internal buffer.
- // Implies l_block == l_intbuf && use_internal_buf == true
- //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
- // Implies l_block == n_keys/2 && use_internal_buf == true
- //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
- bool use_internal_buf = false;
- size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf);
- BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
- BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
- BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
-
- bool const is_merge_left = (n&1) == 0;
- size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
- if(n && prev_use_internal_buf && prev_merge_left){
- if(is_merge_left || !use_internal_buf){
- move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
- }
- else{
- //Put the buffer just after l_total_combined
- RandIt const buf_end = first+l_prev_total_combined;
- RandIt const buf_beg = buf_end-l_block;
- if(l_prev_total_combined > l_total_combined){
- size_type const l_diff = l_prev_total_combined - l_total_combined;
- move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
- }
- else if(l_prev_total_combined < l_total_combined){
- size_type const l_diff = l_total_combined - l_prev_total_combined;
- move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
- }
- }
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf);
- }
-
- //Combine to form l_merged*2 segments
- if(n_keys){
- adaptive_sort_combine_blocks
- ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
- , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
- }
- else{
- size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
- adaptive_sort_combine_blocks
- ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
- , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
- }
-
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf);
- prev_merge_left = is_merge_left;
- l_prev_total_combined = l_total_combined;
- l_prev_block = l_block;
- prev_use_internal_buf = use_internal_buf;
- }
- BOOST_ASSERT(l_prev_total_combined == l_data);
- bool const buffer_right = prev_use_internal_buf && prev_merge_left;
-
- l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
- n_keys = l_unique - l_intbuf;
- //Restore data from to external common buffer if used
- if(common_xbuf){
- if(buffer_right){
- boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
- }
- else{
- boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
- }
- }
- return buffer_right;
-}
-
-template<class RandIt, class Compare, class XBuf>
-void stable_merge
- ( RandIt first, RandIt const middle, RandIt last
- , Compare comp
- , XBuf &xbuf)
-{
- BOOST_ASSERT(xbuf.empty());
- typedef typename iterator_traits<RandIt>::size_type size_type;
- size_type const len1 = size_type(middle-first);
- size_type const len2 = size_type(last-middle);
- size_type const l_min = min_value(len1, len2);
- if(xbuf.capacity() >= l_min){
- buffered_merge(first, middle, last, comp, xbuf);
- xbuf.clear();
- }
- else{
- merge_bufferless(first, middle, last, comp);
- }
-}
-
-
-template<class RandIt, class Compare, class XBuf>
-void adaptive_sort_final_merge( bool buffer_right
- , RandIt const first
- , typename iterator_traits<RandIt>::size_type const l_intbuf
- , typename iterator_traits<RandIt>::size_type const n_keys
- , typename iterator_traits<RandIt>::size_type const len
- , XBuf & xbuf
- , Compare comp)
-{
- //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
- xbuf.clear();
-
- typedef typename iterator_traits<RandIt>::size_type size_type;
- size_type const n_key_plus_buf = l_intbuf+n_keys;
- if(buffer_right){
- stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
- stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
- stable_sort(first, first+n_keys, comp, xbuf);
- stable_merge(first, first+n_keys, first+len, comp, xbuf);
- }
- else{
- stable_sort(first, first+n_key_plus_buf, comp, xbuf);
- if(xbuf.capacity() >= n_key_plus_buf){
- buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
- }
- else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
- stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
- stable_merge(first, first+n_keys, first+len, comp, xbuf);
- }
- else{
- merge_bufferless(first, first+n_key_plus_buf, first+len, comp);
- }
- }
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len);
-}
-
-template<class RandIt, class Compare, class Unsigned, class XBuf>
-bool adaptive_sort_build_params
- (RandIt first, Unsigned const len, Compare comp
- , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
- , XBuf & xbuf
- )
-{
- typedef Unsigned size_type;
-
- //Calculate ideal parameters and try to collect needed unique keys
- l_base = 0u;
-
- //Try to find a value near sqrt(len) that is 2^N*l_base where
- //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
- //as build_blocks merges to the left iteratively duplicating the
- //merged size and all the buffer must be used just before the final
- //merge to right step. This guarantees "build_blocks" produces
- //segments of size l_build_buf*2, maximizing the classic merge phase.
- l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
-
- //The internal buffer can be expanded if there is enough external memory
- while(xbuf.capacity() >= l_intbuf*2){
- l_intbuf *= 2;
- }
-
- //This is the minimum number of keys to implement the ideal algorithm
- //
- //l_intbuf is used as buffer plus the key count
- size_type n_min_ideal_keys = l_intbuf-1;
- while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
- --n_min_ideal_keys;
- }
- n_min_ideal_keys += 1;
- BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
-
- if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
- n_keys = 0u;
- l_build_buf = l_intbuf;
- }
- else{
- //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
- //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
- //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
- //
- //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
- //(to be used for keys in combine_all_blocks) as the whole l_build_buf
- //will be backuped in the buffer during build_blocks.
- bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
- size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
- size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
-
- //If available memory is 2*sqrt(l), then for "build_params"
- //the situation is the same as if 2*l_intbuf were collected.
- if(non_unique_buf && collected == n_min_ideal_keys){
- l_build_buf = l_intbuf;
- n_keys = n_min_ideal_keys;
- }
- else if(collected == 2*l_intbuf){
- //l_intbuf*2 elements found. Use all of them in the build phase
- l_build_buf = l_intbuf*2;
- n_keys = l_intbuf;
- }
- else if(collected == (n_min_ideal_keys+l_intbuf)){
- l_build_buf = l_intbuf;
- n_keys = n_min_ideal_keys;
- }
- //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
- //is possible (due to very low unique keys), then go to a slow sort based on rotations.
- else{
- BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
- if(collected < 4){ //No combination possible with less that 4 keys
- return false;
- }
- n_keys = l_intbuf;
- while(n_keys&(n_keys-1)){
- n_keys &= n_keys-1; // make it power or 2
- }
- while(n_keys > collected){
- n_keys/=2;
- }
- //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
- l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
- l_intbuf = 0;
- l_build_buf = n_keys;
- }
- BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
- }
-
- return true;
-}
-
-template<class RandIt, class Compare, class XBuf>
-inline void adaptive_merge_combine_blocks( RandIt first
- , typename iterator_traits<RandIt>::size_type len1
- , typename iterator_traits<RandIt>::size_type len2
- , typename iterator_traits<RandIt>::size_type collected
- , typename iterator_traits<RandIt>::size_type n_keys
- , typename iterator_traits<RandIt>::size_type l_block
- , bool use_internal_buf
- , bool xbuf_used
- , Compare comp
- , XBuf & xbuf
- )
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
- size_type const len = len1+len2;
- size_type const l_combine = len-collected;
- size_type const l_combine1 = len1-collected;
-
- if(n_keys){
- RandIt const first_data = first+collected;
- RandIt const keys = first;
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
- if(xbuf_used){
- if(xbuf.size() < l_block){
- xbuf.initialize_until(l_block, *first);
- }
- BOOST_ASSERT(xbuf.size() >= l_block);
- size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
- combine_params( keys, comp, l_combine
- , l_combine1, l_block, xbuf
- , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
- merge_blocks_with_buf
- (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len);
- }
- else{
- size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
- combine_params( keys, comp, l_combine
- , l_combine1, l_block, xbuf
- , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
- if(use_internal_buf){
- merge_blocks_with_buf
- (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len);
- }
- else{
- merge_blocks_bufferless
- (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len);
- }
- }
- }
- else{
- xbuf.shrink_to_fit(l_block);
- if(xbuf.size() < l_block){
- xbuf.initialize_until(l_block, *first);
- }
- size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
- size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
- combine_params( uint_keys, less(), l_combine
- , l_combine1, l_block, xbuf
- , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
- BOOST_ASSERT(xbuf.size() >= l_block);
- merge_blocks_with_buf
- (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true);
- xbuf.clear();
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len);
- }
-}
-
-template<class RandIt, class Compare, class XBuf>
-inline void adaptive_merge_final_merge( RandIt first
- , typename iterator_traits<RandIt>::size_type len1
- , typename iterator_traits<RandIt>::size_type len2
- , typename iterator_traits<RandIt>::size_type collected
- , typename iterator_traits<RandIt>::size_type l_intbuf
- , typename iterator_traits<RandIt>::size_type l_block
- , bool use_internal_buf
- , bool xbuf_used
- , Compare comp
- , XBuf & xbuf
- )
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
- (void)l_block;
- size_type n_keys = collected-l_intbuf;
- size_type len = len1+len2;
- if(use_internal_buf){
- if(xbuf_used){
- xbuf.clear();
- //Nothing to do
- if(n_keys){
- stable_sort(first, first+n_keys, comp, xbuf);
- stable_merge(first, first+n_keys, first+len, comp, xbuf);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len);
- }
- }
- else{
- xbuf.clear();
- stable_sort(first, first+collected, comp, xbuf);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
- stable_merge(first, first+collected, first+len, comp, xbuf);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
- }
- }
- else{
- xbuf.clear();
- stable_sort(first, first+collected, comp, xbuf);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
- stable_merge(first, first+collected, first+len1+len2, comp, xbuf);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
- }
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len);
-}
-
-template<class SizeType, class Xbuf>
-inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
-{
- typedef SizeType size_type;
- size_type l_block = rl_block;
- size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
-
- while(xbuf.capacity() >= l_block*2){
- l_block *= 2;
- }
-
- //This is the minimum number of keys to implement the ideal algorithm
- size_type n_keys = len1/l_block+len2/l_block;
- while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){
- --n_keys;
- }
- ++n_keys;
- BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
-
- if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
- n_keys = 0u;
- }
- l_intbuf_inout = l_intbuf;
- rl_block = l_block;
- return n_keys;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////
-
-// Main explanation of the sort algorithm.
-//
-// csqrtlen = ceil(sqrt(len));
-//
-// * First, 2*csqrtlen unique elements elements are extracted from elements to be
-// sorted and placed in the beginning of the range.
-//
-// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
-// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
-// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
-// 2*csqrtlen unique elements are again the leading elements of the whole range.
-//
-// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
-// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
-// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
-// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
-//
-// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
-// know if elements belong to the first or second block to be merged and another
-// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
-//
-// Iteratively until all trailing (len-2*csqrtlen) elements are merged:
-// Iteratively for each pair of previously merged block:
-// * Blocks are divided groups of csqrtlen elements and
-// 2*merged_block/csqrtlen keys are sorted to be used as markers
-// * Groups are selection-sorted by first or last element (depending whether they are going
-// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
-// * Elements of each block pair are merged using the csqrtlen buffer taking into account
-// if they belong to the first half or second half (marked by the key).
-//
-// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
-// rotations with the rest of sorted elements in the "combine_blocks" step.
-//
-// Corner cases:
-//
-// * If no 2*csqrtlen elements can be extracted:
-//
-// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
-// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
-// means that an additional "combine_blocks" step will be needed to merge all elements.
-//
-// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
-// then reduces the number of elements used as buffer and keys in the "build_blocks"
-// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
-// then uses a rotation based smart merge.
-//
-// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
-//
-// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
-//
-// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
-// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
-// keys to combine blocks.
-//
-// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
-// using classic merge and "combine_blocks" will use bigger blocks when merging.
-template<class RandIt, class Compare, class XBuf>
-void adaptive_sort_impl
- ( RandIt first
- , typename iterator_traits<RandIt>::size_type const len
- , Compare comp
- , XBuf & xbuf
- )
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
-
- //Small sorts go directly to insertion sort
- if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
- insertion_sort(first, first + len, comp);
- }
- else if((len-len/2) <= xbuf.capacity()){
- merge_sort(first, first+len, comp, xbuf.data());
- }
- else{
- //Make sure it is at least four
- BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
-
- size_type l_base = 0;
- size_type l_intbuf = 0;
- size_type n_keys = 0;
- size_type l_build_buf = 0;
-
- //Calculate and extract needed unique elements. If a minimum is not achieved
- //fallback to a slow stable sort
- if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
- stable_sort(first, first+len, comp, xbuf);
- }
- else{
- BOOST_ASSERT(l_build_buf);
- //Otherwise, continue the adaptive_sort
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len);
- size_type const n_key_plus_buf = l_intbuf+n_keys;
- //l_build_buf is always power of two if l_intbuf is zero
- BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
-
- //Classic merge sort until internal buffer and xbuf are exhausted
- size_type const l_merged = adaptive_sort_build_blocks
- (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len);
-
- //Non-trivial merge
- bool const buffer_right = adaptive_sort_combine_all_blocks
- (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
-
- //Sort keys and buffer and merge the whole sequence
- adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
- }
- }
-}
-
-// Main explanation of the merge algorithm.
-//
-// csqrtlen = ceil(sqrt(len));
-//
-// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
-// unique elements are extracted from elements to be sorted and placed in the beginning of the range.
-//
-// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
-// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
-//
-// Explanation of the "combine_blocks" step:
-//
-// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
-// Remaining elements that can't form a group are grouped in front of those elements.
-// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
-// Remaining elements that can't form a group are grouped in the back of those elements.
-// * In parallel the following two steps are performed:
-// * Groups are selection-sorted by first or last element (depending whether they are going
-// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
-// * Elements of each block pair are merged using the csqrtlen buffer taking into account
-// if they belong to the first half or second half (marked by the key).
-//
-// * In the final merge step leading "to_collect" elements are merged with rotations
-// with the rest of merged elements in the "combine_blocks" step.
-//
-// Corner cases:
-//
-// * If no "to_collect" elements can be extracted:
-//
-// * If more than a minimum number of elements is extracted
-// then reduces the number of elements used as buffer and keys in the
-// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
-// then uses a rotation based smart merge.
-//
-// * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
-//
-// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
-//
-// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
-//
-// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
-// then no csqrtlen need to be extracted and "combine_blocks" will use integral
-// keys to combine blocks.
-template<class RandIt, class Compare, class XBuf>
-void adaptive_merge_impl
- ( RandIt first
- , typename iterator_traits<RandIt>::size_type const len1
- , typename iterator_traits<RandIt>::size_type const len2
- , Compare comp
- , XBuf & xbuf
- )
-{
- typedef typename iterator_traits<RandIt>::size_type size_type;
-
- if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
- buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
- }
- else{
- const size_type len = len1+len2;
- //Calculate ideal parameters and try to collect needed unique keys
- size_type l_block = size_type(ceil_sqrt(len));
-
- //One range is not big enough to extract keys and the internal buffer so a
- //rotation-based based merge will do just fine
- if(len1 <= l_block*2 || len2 <= l_block*2){
- merge_bufferless(first, first+len1, first+len1+len2, comp);
- return;
- }
-
- //Detail the number of keys and internal buffer. If xbuf has enough memory, no
- //internal buffer is needed so l_intbuf will remain 0.
- size_type l_intbuf = 0;
- size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
- size_type const to_collect = l_intbuf+n_keys;
- //Try to extract needed unique values from the first range
- size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
- BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len);
-
- //Not the minimum number of keys is not available on the first range, so fallback to rotations
- if(collected != to_collect && collected < 4){
- merge_bufferless(first, first+collected, first+len1, comp);
- merge_bufferless(first, first + len1, first + len1 + len2, comp);
- return;
- }
-
- //If not enough keys but more than minimum, adjust the internal buffer and key count
- bool use_internal_buf = collected == to_collect;
- if (!use_internal_buf){
- l_intbuf = 0u;
- n_keys = collected;
- l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
- //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
- l_intbuf = use_internal_buf ? l_block : 0u;
- }
-
- bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
- //Merge trailing elements using smart merges
- adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
- //Merge buffer and keys with the rest of the values
- adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
- }
-}
-
} //namespace detail_adaptive {
} //namespace movelib {
diff --git a/boost/move/algo/detail/heap_sort.hpp b/boost/move/algo/detail/heap_sort.hpp
new file mode 100644
index 0000000000..5474d9f5c4
--- /dev/null
+++ b/boost/move/algo/detail/heap_sort.hpp
@@ -0,0 +1,111 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+// (C) Copyright Ion Gaztanaga 2017-2018.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/move for documentation.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+//! \file
+
+#ifndef BOOST_MOVE_DETAIL_HEAP_SORT_HPP
+#define BOOST_MOVE_DETAIL_HEAP_SORT_HPP
+
+#ifndef BOOST_CONFIG_HPP
+# include <boost/config.hpp>
+#endif
+#
+#if defined(BOOST_HAS_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include <boost/move/detail/config_begin.hpp>
+#include <boost/move/detail/workaround.hpp>
+#include <boost/move/detail/iterator_traits.hpp>
+#include <boost/move/algo/detail/is_sorted.hpp>
+#include <boost/move/utility_core.hpp>
+
+namespace boost { namespace movelib{
+
+template <class RandomAccessIterator, class Compare>
+class heap_sort_helper
+{
+ typedef typename boost::movelib::iterator_traits<RandomAccessIterator>::size_type size_type;
+ typedef typename boost::movelib::iterator_traits<RandomAccessIterator>::value_type value_type;
+
+ static void adjust_heap(RandomAccessIterator first, size_type hole_index, size_type const len, value_type &value, Compare comp)
+ {
+ size_type const top_index = hole_index;
+ size_type second_child = 2 * (hole_index + 1);
+
+ while (second_child < len) {
+ if (comp(*(first + second_child), *(first + (second_child - 1))))
+ second_child--;
+ *(first + hole_index) = boost::move(*(first + second_child));
+ hole_index = second_child;
+ second_child = 2 * (second_child + 1);
+ }
+ if (second_child == len) {
+ *(first + hole_index) = boost::move(*(first + (second_child - 1)));
+ hole_index = second_child - 1;
+ }
+
+ { //push_heap-like ending
+ size_type parent = (hole_index - 1) / 2;
+ while (hole_index > top_index && comp(*(first + parent), value)) {
+ *(first + hole_index) = boost::move(*(first + parent));
+ hole_index = parent;
+ parent = (hole_index - 1) / 2;
+ }
+ *(first + hole_index) = boost::move(value);
+ }
+ }
+
+ static void make_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
+ {
+ size_type const len = size_type(last - first);
+ if (len > 1) {
+ size_type parent = len/2u - 1u;
+
+ do {
+ value_type v(boost::move(*(first + parent)));
+ adjust_heap(first, parent, len, v, comp);
+ }while (parent--);
+ }
+ }
+
+ static void sort_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
+ {
+ size_type len = size_type(last - first);
+ while (len > 1) {
+ //move biggest to the safe zone
+ --last;
+ value_type v(boost::move(*last));
+ *last = boost::move(*first);
+ adjust_heap(first, size_type(0), --len, v, comp);
+ }
+ }
+
+ public:
+ static void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
+ {
+ make_heap(first, last, comp);
+ sort_heap(first, last, comp);
+ BOOST_ASSERT(boost::movelib::is_sorted(first, last, comp));
+ }
+};
+
+template <class RandomAccessIterator, class Compare>
+BOOST_MOVE_FORCEINLINE void heap_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
+{
+ heap_sort_helper<RandomAccessIterator, Compare>::sort(first, last, comp);
+}
+
+}} //namespace boost { namespace movelib{
+
+#include <boost/move/detail/config_end.hpp>
+
+#endif //#ifndef BOOST_MOVE_DETAIL_HEAP_SORT_HPP
diff --git a/boost/move/algo/detail/insertion_sort.hpp b/boost/move/algo/detail/insertion_sort.hpp
index 3328f75748..5c378c3e36 100644
--- a/boost/move/algo/detail/insertion_sort.hpp
+++ b/boost/move/algo/detail/insertion_sort.hpp
@@ -101,21 +101,21 @@ void insertion_sort_uninitialized_copy
typedef typename iterator_traits<BirdirectionalIterator>::value_type value_type;
if (first1 != last1){
BirdirectionalRawIterator last2 = first2;
- ::new((iterator_to_raw_pointer)(last2), boost_move_new_t()) value_type(move(*first1));
+ ::new((iterator_to_raw_pointer)(last2), boost_move_new_t()) value_type(::boost::move(*first1));
destruct_n<value_type, BirdirectionalRawIterator> d(first2);
d.incr();
for (++last2; ++first1 != last1; ++last2){
BirdirectionalRawIterator j2 = last2;
BirdirectionalRawIterator k2 = j2;
if (comp(*first1, *--k2)){
- ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(move(*k2));
+ ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(::boost::move(*k2));
d.incr();
for (--j2; k2 != first2 && comp(*first1, *--k2); --j2)
- *j2 = move(*k2);
- *j2 = move(*first1);
+ *j2 = ::boost::move(*k2);
+ *j2 = ::boost::move(*first1);
}
else{
- ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(move(*first1));
+ ::new((iterator_to_raw_pointer)(j2), boost_move_new_t()) value_type(::boost::move(*first1));
d.incr();
}
}
diff --git a/boost/move/algo/detail/is_sorted.hpp b/boost/move/algo/detail/is_sorted.hpp
new file mode 100644
index 0000000000..d3dccfc2db
--- /dev/null
+++ b/boost/move/algo/detail/is_sorted.hpp
@@ -0,0 +1,55 @@
+#ifndef BOOST_MOVE_DETAIL_IS_SORTED_HPP
+#define BOOST_MOVE_DETAIL_IS_SORTED_HPP
+///////////////////////////////////////////////////////////////////////////////
+//
+// (C) Copyright Ion Gaztanaga 2017-2018. Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/container for documentation.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef BOOST_CONFIG_HPP
+# include <boost/config.hpp>
+#endif
+
+#if defined(BOOST_HAS_PRAGMA_ONCE)
+# pragma once
+#endif
+
+namespace boost {
+namespace movelib {
+
+template<class ForwardIt, class Pred>
+bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred)
+{
+ if (first != last) {
+ ForwardIt next = first, cur(first);
+ while (++next != last) {
+ if (pred(*next, *cur))
+ return false;
+ cur = next;
+ }
+ }
+ return true;
+}
+
+template<class ForwardIt, class Pred>
+bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred)
+{
+ if (first != last) {
+ ForwardIt next = first;
+ while (++next != last) {
+ if (!pred(*first, *next))
+ return false;
+ first = next;
+ }
+ }
+ return true;
+}
+
+} //namespace movelib {
+} //namespace boost {
+
+#endif //BOOST_MOVE_DETAIL_IS_SORTED_HPP
diff --git a/boost/move/algo/detail/merge.hpp b/boost/move/algo/detail/merge.hpp
index 621dfa28af..860773579c 100644
--- a/boost/move/algo/detail/merge.hpp
+++ b/boost/move/algo/detail/merge.hpp
@@ -256,56 +256,67 @@ void swap_merge_right
op_merge_right(first1, last1, last2, buf_last, comp, swap_op());
}
-template <class BidirIt, class Distance, class Compare>
+//Complexity: min(len1,len2)^2 + max(len1,len2)
+template<class RandIt, class Compare>
+void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp)
+{
+ if((middle - first) < (last - middle)){
+ while(first != middle){
+ RandIt const old_last1 = middle;
+ middle = boost::movelib::lower_bound(middle, last, *first, comp);
+ first = rotate_gcd(first, old_last1, middle);
+ if(middle == last){
+ break;
+ }
+ do{
+ ++first;
+ } while(first != middle && !comp(*middle, *first));
+ }
+ }
+ else{
+ while(middle != last){
+ RandIt p = boost::movelib::upper_bound(first, middle, last[-1], comp);
+ last = rotate_gcd(p, middle, last);
+ middle = p;
+ if(middle == first){
+ break;
+ }
+ --p;
+ do{
+ --last;
+ } while(middle != last && !comp(last[-1], *p));
+ }
+ }
+}
+
+static const std::size_t MergeBufferlessONLogNRotationThreshold = 32;
+
+template <class RandIt, class Distance, class Compare>
void merge_bufferless_ONlogN_recursive
- (BidirIt first, BidirIt middle, BidirIt last, Distance len1, Distance len2, Compare comp)
+ (RandIt first, RandIt middle, RandIt last, Distance len1, Distance len2, Compare comp)
{
- typedef typename iterator_traits<BidirIt>::size_type size_type;
+ typedef typename iterator_traits<RandIt>::size_type size_type;
+
while(1) {
- //#define MERGE_BUFFERLESS_RECURSIVE_OPT
- #ifndef MERGE_BUFFERLESS_RECURSIVE_OPT
- if (len2 == 0) {
+ //trivial cases
+ if (!len2) {
return;
}
-
- if (!len1) {
+ else if (!len1) {
return;
}
-
- if ((len1 | len2) == 1) {
+ else if (size_type(len1 | len2) == 1u) {
if (comp(*middle, *first))
adl_move_swap(*first, *middle);
return;
}
- #else
- if (len2 == 0) {
+ else if(size_type(len1+len2) < MergeBufferlessONLogNRotationThreshold){
+ merge_bufferless_ON2(first, middle, last, comp);
return;
}
- if (!len1) {
- return;
- }
- BidirIt middle_prev = middle; --middle_prev;
- if(!comp(*middle, *middle_prev))
- return;
-
- while(true) {
- if (comp(*middle, *first))
- break;
- ++first;
- if(--len1 == 1)
- break;
- }
-
- if (len1 == 1 && len2 == 1) {
- //comp(*middle, *first) == true already tested in the loop
- adl_move_swap(*first, *middle);
- return;
- }
- #endif
-
- BidirIt first_cut = first;
- BidirIt second_cut = middle;
+ RandIt first_cut = first;
+ RandIt second_cut = middle;
Distance len11 = 0;
Distance len22 = 0;
if (len1 > len2) {
@@ -320,20 +331,18 @@ void merge_bufferless_ONlogN_recursive
first_cut = boost::movelib::upper_bound(first, middle, *second_cut, comp);
len11 = size_type(first_cut - first);
}
- BidirIt new_middle = rotate_gcd(first_cut, middle, second_cut);
+ RandIt new_middle = rotate_gcd(first_cut, middle, second_cut);
//Avoid one recursive call doing a manual tail call elimination on the biggest range
const Distance len_internal = len11+len22;
if( len_internal < (len1 + len2 - len_internal) ) {
merge_bufferless_ONlogN_recursive(first, first_cut, new_middle, len11, len22, comp);
- //merge_bufferless_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp);
first = new_middle;
middle = second_cut;
len1 -= len11;
len2 -= len22;
}
else {
- //merge_bufferless_recursive(first, first_cut, new_middle, len11, len22, comp);
merge_bufferless_ONlogN_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp);
middle = first_cut;
last = new_middle;
@@ -344,50 +353,17 @@ void merge_bufferless_ONlogN_recursive
}
//Complexity: NlogN
-template<class BidirIt, class Compare>
-void merge_bufferless_ONlogN(BidirIt first, BidirIt middle, BidirIt last, Compare comp)
+template<class RandIt, class Compare>
+void merge_bufferless_ONlogN(RandIt first, RandIt middle, RandIt last, Compare comp)
{
merge_bufferless_ONlogN_recursive
(first, middle, last, middle - first, last - middle, comp);
}
-//Complexity: min(len1,len2)^2 + max(len1,len2)
-template<class RandIt, class Compare>
-void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp)
-{
- if((middle - first) < (last - middle)){
- while(first != middle){
- RandIt const old_last1 = middle;
- middle = boost::movelib::lower_bound(middle, last, *first, comp);
- first = rotate_gcd(first, old_last1, middle);
- if(middle == last){
- break;
- }
- do{
- ++first;
- } while(first != middle && !comp(*middle, *first));
- }
- }
- else{
- while(middle != last){
- RandIt p = boost::movelib::upper_bound(first, middle, last[-1], comp);
- last = rotate_gcd(p, middle, last);
- middle = p;
- if(middle == first){
- break;
- }
- --p;
- do{
- --last;
- } while(middle != last && !comp(last[-1], *p));
- }
- }
-}
-
template<class RandIt, class Compare>
void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp)
{
- //#define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE
+ #define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE
#ifdef BOOST_ADAPTIVE_MERGE_NLOGN_MERGE
merge_bufferless_ONlogN(first, middle, last, comp);
#else
diff --git a/boost/move/algo/detail/pdqsort.hpp b/boost/move/algo/detail/pdqsort.hpp
new file mode 100644
index 0000000000..b6a127896c
--- /dev/null
+++ b/boost/move/algo/detail/pdqsort.hpp
@@ -0,0 +1,334 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+// (C) Copyright Orson Peters 2017.
+// (C) Copyright Ion Gaztanaga 2017-2018.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/move for documentation.
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+// This implementation of Pattern-defeating quicksort (pdqsort) was written
+// by Orson Peters, and discussed in the Boost mailing list:
+// http://boost.2283326.n4.nabble.com/sort-pdqsort-td4691031.html
+//
+// This implementation is the adaptation by Ion Gaztanaga of code originally in GitHub
+// with permission from the author to relicense it under the Boost Software License
+// (see the Boost mailing list for details).
+//
+// The original copyright statement is pasted here for completeness:
+//
+// pdqsort.h - Pattern-defeating quicksort.
+// Copyright (c) 2015 Orson Peters
+// This software is provided 'as-is', without any express or implied warranty. In no event will the
+// authors be held liable for any damages arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose, including commercial
+// applications, and to alter it and redistribute it freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the
+// original software. If you use this software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as
+// being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef BOOST_MOVE_ALGO_PDQSORT_HPP
+#define BOOST_MOVE_ALGO_PDQSORT_HPP
+
+#ifndef BOOST_CONFIG_HPP
+# include <boost/config.hpp>
+#endif
+#
+#if defined(BOOST_HAS_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include <boost/move/detail/config_begin.hpp>
+#include <boost/move/detail/workaround.hpp>
+#include <boost/move/utility_core.hpp>
+#include <boost/move/algo/detail/insertion_sort.hpp>
+#include <boost/move/algo/detail/heap_sort.hpp>
+#include <boost/move/detail/iterator_traits.hpp>
+
+#include <boost/move/adl_move_swap.hpp>
+#include <cstddef>
+
+namespace boost {
+namespace movelib {
+
+namespace pdqsort_detail {
+
+ //A simple pair implementation to avoid including <utility>
+ template<class T1, class T2>
+ struct pair
+ {
+ pair()
+ {}
+
+ pair(const T1 &t1, const T2 &t2)
+ : first(t1), second(t2)
+ {}
+
+ T1 first;
+ T2 second;
+ };
+
+ enum {
+ // Partitions below this size are sorted using insertion sort.
+ insertion_sort_threshold = 24,
+
+ // Partitions above this size use Tukey's ninther to select the pivot.
+ ninther_threshold = 128,
+
+ // When we detect an already sorted partition, attempt an insertion sort that allows this
+ // amount of element moves before giving up.
+ partial_insertion_sort_limit = 8,
+
+ // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
+ block_size = 64,
+
+ // Cacheline size, assumes power of two.
+ cacheline_size = 64
+
+ };
+
+ // Returns floor(log2(n)), assumes n > 0.
+ template<class Unsigned>
+ Unsigned log2(Unsigned n) {
+ Unsigned log = 0;
+ while (n >>= 1) ++log;
+ return log;
+ }
+
+ // Attempts to use insertion sort on [begin, end). Will return false if more than
+ // partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
+ // successfully sort and return true.
+ template<class Iter, class Compare>
+ inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
+ typedef typename boost::movelib::iterator_traits<Iter>::value_type T;
+ typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type;
+ if (begin == end) return true;
+
+ size_type limit = 0;
+ for (Iter cur = begin + 1; cur != end; ++cur) {
+ if (limit > partial_insertion_sort_limit) return false;
+
+ Iter sift = cur;
+ Iter sift_1 = cur - 1;
+
+ // Compare first so we can avoid 2 moves for an element already positioned correctly.
+ if (comp(*sift, *sift_1)) {
+ T tmp = boost::move(*sift);
+
+ do { *sift-- = boost::move(*sift_1); }
+ while (sift != begin && comp(tmp, *--sift_1));
+
+ *sift = boost::move(tmp);
+ limit += size_type(cur - sift);
+ }
+ }
+
+ return true;
+ }
+
+ template<class Iter, class Compare>
+ inline void sort2(Iter a, Iter b, Compare comp) {
+ if (comp(*b, *a)) boost::adl_move_iter_swap(a, b);
+ }
+
+ // Sorts the elements *a, *b and *c using comparison function comp.
+ template<class Iter, class Compare>
+ inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
+ sort2(a, b, comp);
+ sort2(b, c, comp);
+ sort2(a, b, comp);
+ }
+
+ // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
+ // to the pivot are put in the right-hand partition. Returns the position of the pivot after
+ // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
+ // pivot is a median of at least 3 elements and that [begin, end) is at least
+ // insertion_sort_threshold long.
+ template<class Iter, class Compare>
+ pdqsort_detail::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
+ typedef typename boost::movelib::iterator_traits<Iter>::value_type T;
+
+ // Move pivot into local for speed.
+ T pivot(boost::move(*begin));
+
+ Iter first = begin;
+ Iter last = end;
+
+ // Find the first element greater than or equal than the pivot (the median of 3 guarantees
+ // this exists).
+ while (comp(*++first, pivot));
+
+ // Find the first element strictly smaller than the pivot. We have to guard this search if
+ // there was no element before *first.
+ if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
+ else while ( !comp(*--last, pivot));
+
+ // If the first pair of elements that should be swapped to partition are the same element,
+ // the passed in sequence already was correctly partitioned.
+ bool already_partitioned = first >= last;
+
+ // Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
+ // swapped pairs guard the searches, which is why the first iteration is special-cased
+ // above.
+ while (first < last) {
+ boost::adl_move_iter_swap(first, last);
+ while (comp(*++first, pivot));
+ while (!comp(*--last, pivot));
+ }
+
+ // Put the pivot in the right place.
+ Iter pivot_pos = first - 1;
+ *begin = boost::move(*pivot_pos);
+ *pivot_pos = boost::move(pivot);
+
+ return pdqsort_detail::pair<Iter, bool>(pivot_pos, already_partitioned);
+ }
+
+ // Similar function to the one above, except elements equal to the pivot are put to the left of
+ // the pivot and it doesn't check or return if the passed sequence already was partitioned.
+ // Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
+ // performance, no block quicksort is applied here for simplicity.
+ template<class Iter, class Compare>
+ inline Iter partition_left(Iter begin, Iter end, Compare comp) {
+ typedef typename boost::movelib::iterator_traits<Iter>::value_type T;
+
+ T pivot(boost::move(*begin));
+ Iter first = begin;
+ Iter last = end;
+
+ while (comp(pivot, *--last));
+
+ if (last + 1 == end) while (first < last && !comp(pivot, *++first));
+ else while ( !comp(pivot, *++first));
+
+ while (first < last) {
+ boost::adl_move_iter_swap(first, last);
+ while (comp(pivot, *--last));
+ while (!comp(pivot, *++first));
+ }
+
+ Iter pivot_pos = last;
+ *begin = boost::move(*pivot_pos);
+ *pivot_pos = boost::move(pivot);
+
+ return pivot_pos;
+ }
+
+
+ template<class Iter, class Compare>
+ void pdqsort_loop( Iter begin, Iter end, Compare comp
+ , typename boost::movelib::iterator_traits<Iter>::size_type bad_allowed
+ , bool leftmost = true)
+ {
+ typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type;
+
+ // Use a while loop for tail recursion elimination.
+ while (true) {
+ size_type size = size_type(end - begin);
+
+ // Insertion sort is faster for small arrays.
+ if (size < insertion_sort_threshold) {
+ insertion_sort(begin, end, comp);
+ return;
+ }
+
+ // Choose pivot as median of 3 or pseudomedian of 9.
+ size_type s2 = size / 2;
+ if (size > ninther_threshold) {
+ sort3(begin, begin + s2, end - 1, comp);
+ sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
+ sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
+ sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
+ boost::adl_move_iter_swap(begin, begin + s2);
+ } else sort3(begin + s2, begin, end - 1, comp);
+
+ // If *(begin - 1) is the end of the right partition of a previous partition operation
+ // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
+ // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
+ // the left partition, greater elements in the right partition. We do not have to
+ // recurse on the left partition, since it's sorted (all equal).
+ if (!leftmost && !comp(*(begin - 1), *begin)) {
+ begin = partition_left(begin, end, comp) + 1;
+ continue;
+ }
+
+ // Partition and get results.
+ pdqsort_detail::pair<Iter, bool> part_result = partition_right(begin, end, comp);
+ Iter pivot_pos = part_result.first;
+ bool already_partitioned = part_result.second;
+
+ // Check for a highly unbalanced partition.
+ size_type l_size = size_type(pivot_pos - begin);
+ size_type r_size = size_type(end - (pivot_pos + 1));
+ bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
+
+ // If we got a highly unbalanced partition we shuffle elements to break many patterns.
+ if (highly_unbalanced) {
+ // If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
+ if (--bad_allowed == 0) {
+ boost::movelib::heap_sort(begin, end, comp);
+ return;
+ }
+
+ if (l_size >= insertion_sort_threshold) {
+ boost::adl_move_iter_swap(begin, begin + l_size / 4);
+ boost::adl_move_iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
+
+ if (l_size > ninther_threshold) {
+ boost::adl_move_iter_swap(begin + 1, begin + (l_size / 4 + 1));
+ boost::adl_move_iter_swap(begin + 2, begin + (l_size / 4 + 2));
+ boost::adl_move_iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
+ boost::adl_move_iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
+ }
+ }
+
+ if (r_size >= insertion_sort_threshold) {
+ boost::adl_move_iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
+ boost::adl_move_iter_swap(end - 1, end - r_size / 4);
+
+ if (r_size > ninther_threshold) {
+ boost::adl_move_iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
+ boost::adl_move_iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
+ boost::adl_move_iter_swap(end - 2, end - (1 + r_size / 4));
+ boost::adl_move_iter_swap(end - 3, end - (2 + r_size / 4));
+ }
+ }
+ } else {
+ // If we were decently balanced and we tried to sort an already partitioned
+ // sequence try to use insertion sort.
+ if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
+ && partial_insertion_sort(pivot_pos + 1, end, comp)) return;
+ }
+
+ // Sort the left partition first using recursion and do tail recursion elimination for
+ // the right-hand partition.
+ pdqsort_loop<Iter, Compare>(begin, pivot_pos, comp, bad_allowed, leftmost);
+ begin = pivot_pos + 1;
+ leftmost = false;
+ }
+ }
+}
+
+
+template<class Iter, class Compare>
+void pdqsort(Iter begin, Iter end, Compare comp)
+{
+ if (begin == end) return;
+ typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type;
+ pdqsort_detail::pdqsort_loop<Iter, Compare>(begin, end, comp, pdqsort_detail::log2(size_type(end - begin)));
+}
+
+} //namespace movelib {
+} //namespace boost {
+
+#include <boost/move/detail/config_end.hpp>
+
+#endif //BOOST_MOVE_ALGO_PDQSORT_HPP
diff --git a/boost/move/algo/detail/set_difference.hpp b/boost/move/algo/detail/set_difference.hpp
new file mode 100644
index 0000000000..51d047592a
--- /dev/null
+++ b/boost/move/algo/detail/set_difference.hpp
@@ -0,0 +1,207 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+// (C) Copyright Ion Gaztanaga 2017-2017.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/move for documentation.
+//
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_MOVE_SET_DIFFERENCE_HPP
+#define BOOST_MOVE_SET_DIFFERENCE_HPP
+
+#include <boost/move/algo/move.hpp>
+#include <boost/move/iterator.hpp>
+#include <boost/move/utility_core.hpp>
+
+namespace boost {
+
+namespace move_detail{
+
+template<class InputIt, class OutputIt>
+OutputIt copy(InputIt first, InputIt last, OutputIt result)
+{
+ while (first != last) {
+ *result++ = *first;
+ ++result;
+ ++first;
+ }
+ return result;
+}
+
+} //namespace move_detail{
+
+namespace movelib {
+
+//Moves the elements from the sorted range [first1, last1) which are not found in the sorted
+//range [first2, last2) to the range beginning at result.
+//The resulting range is also sorted. Equivalent elements are treated individually,
+//that is, if some element is found m times in [first1, last1) and n times in [first2, last2),
+//it will be moved to result exactly max(m-n, 0) times.
+//The resulting range cannot overlap with either of the input ranges.
+template<class InputIt1, class InputIt2,
+ class OutputIt, class Compare>
+OutputIt set_difference
+ (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, OutputIt result, Compare comp)
+{
+ while (first1 != last1) {
+ if (first2 == last2)
+ return boost::move_detail::copy(first1, last1, result);
+
+ if (comp(*first1, *first2)) {
+ *result = *first1;
+ ++result;
+ ++first1;
+ }
+ else {
+ if (!comp(*first2, *first1)) {
+ ++first1;
+ }
+ ++first2;
+ }
+ }
+ return result;
+}
+
+//Moves the elements from the sorted range [first1, last1) which are not found in the sorted
+//range [first2, last2) to the range beginning at first1 (in place operation in range1).
+//The resulting range is also sorted. Equivalent elements are treated individually,
+//that is, if some element is found m times in [first1, last1) and n times in [first2, last2),
+//it will be moved to result exactly max(m-n, 0) times.
+template<class InputOutputIt1, class InputIt2, class Compare>
+InputOutputIt1 inplace_set_difference
+ (InputOutputIt1 first1, InputOutputIt1 last1, InputIt2 first2, InputIt2 last2, Compare comp )
+{
+ while (first1 != last1) {
+ //Skip copying from range 1 if no element has to be skipped
+ if (first2 == last2){
+ return last1;
+ }
+ else if (comp(*first1, *first2)){
+ ++first1;
+ }
+ else{
+ if (!comp(*first2, *first1)) {
+ InputOutputIt1 result = first1;
+ //An element from range 1 must be skipped, no longer an inplace operation
+ return boost::movelib::set_difference
+ ( boost::make_move_iterator(++first1)
+ , boost::make_move_iterator(last1)
+ , ++first2, last2, result, comp);
+ }
+ ++first2;
+ }
+ }
+ return first1;
+}
+
+//Moves the elements from the sorted range [first1, last1) which are not found in the sorted
+//range [first2, last2) to the range beginning at first1.
+//The resulting range is also sorted. Equivalent elements from range 1 are moved past to end
+//of the result,
+//that is, if some element is found m times in [first1, last1) and n times in [first2, last2),
+//it will be moved to result exactly max(m-n, 0) times.
+//The resulting range cannot overlap with either of the input ranges.
+template<class ForwardIt1, class InputIt2,
+ class OutputIt, class Compare>
+OutputIt set_unique_difference
+ (ForwardIt1 first1, ForwardIt1 last1, InputIt2 first2, InputIt2 last2, OutputIt result, Compare comp)
+{
+ while (first1 != last1) {
+ if (first2 == last2){
+ //unique_copy-like sequence with forward iterators but don't write i
+ //to result before comparing as moving *i could alter the value in i.
+ ForwardIt1 i = first1;
+ while (++first1 != last1) {
+ if (comp(*i, *first1)) {
+ *result = *i;
+ ++result;
+ i = first1;
+ }
+ }
+ *result = *i;
+ ++result;
+ break;
+ }
+
+ if (comp(*first1, *first2)) {
+ //Skip equivalent elements in range1 but don't write i
+ //to result before comparing as moving *i could alter the value in i.
+ ForwardIt1 i = first1;
+ while (++first1 != last1) {
+ if (comp(*i, *first1)) {
+ break;
+ }
+ }
+ *result = *i;
+ ++result;
+ }
+ else {
+ if (comp(*first2, *first1)) {
+ ++first2;
+ }
+ else{
+ ++first1;
+ }
+ }
+ }
+ return result;
+}
+
+//Moves the elements from the sorted range [first1, last1) which are not found in the sorted
+//range [first2, last2) to the range beginning at first1 (in place operation in range1).
+//The resulting range is also sorted. Equivalent elements are treated individually,
+//that is, if some element is found m times in [first1, last1) and n times in [first2, last2),
+//it will be moved to result exactly max(m-n, 0) times.
+template<class ForwardOutputIt1, class ForwardIt2, class Compare>
+ForwardOutputIt1 inplace_set_unique_difference
+ (ForwardOutputIt1 first1, ForwardOutputIt1 last1, ForwardIt2 first2, ForwardIt2 last2, Compare comp )
+{
+ while (first1 != last1) {
+ //Skip copying from range 1 if no element has to be skipped
+ if (first2 == last2){
+ //unique-like algorithm for the remaining range 1
+ ForwardOutputIt1 result = first1;
+ while (++first1 != last1) {
+ if (comp(*result, *first1) && ++result != first1) {
+ *result = boost::move(*first1);
+ }
+ }
+ return ++result;
+ }
+ else if (comp(*first2, *first1)) {
+ ++first2;
+ }
+ else if (comp(*first1, *first2)){
+ //skip any adjacent equivalent elementin range 1
+ ForwardOutputIt1 result = first1;
+ if (++first1 != last1 && !comp(*result, *first1)) {
+ //Some elements from range 1 must be skipped, no longer an inplace operation
+ while (++first1 != last1 && !comp(*result, *first1)){}
+ return boost::movelib::set_unique_difference
+ ( boost::make_move_iterator(first1)
+ , boost::make_move_iterator(last1)
+ , first2, last2, ++result, comp);
+ }
+ }
+ else{
+ ForwardOutputIt1 result = first1;
+ //Some elements from range 1 must be skipped, no longer an inplace operation
+ while (++first1 != last1 && !comp(*result, *first1)){}
+ //An element from range 1 must be skipped, no longer an inplace operation
+ return boost::movelib::set_unique_difference
+ ( boost::make_move_iterator(first1)
+ , boost::make_move_iterator(last1)
+ , first2, last2, result, comp);
+ }
+ }
+ return first1;
+}
+
+
+
+} //namespace movelib {
+} //namespace boost {
+
+#endif //#define BOOST_MOVE_SET_DIFFERENCE_HPP
diff --git a/boost/move/detail/type_traits.hpp b/boost/move/detail/type_traits.hpp
index 272cb11af8..a3326d00e1 100644
--- a/boost/move/detail/type_traits.hpp
+++ b/boost/move/detail/type_traits.hpp
@@ -973,7 +973,7 @@ struct aligned_struct;
template<std::size_t Len>\
struct BOOST_ALIGNMENT(A) aligned_struct<Len, A>\
{\
- char dummy[Len];\
+ char data[Len];\
};\
//
@@ -997,9 +997,10 @@ BOOST_MOVE_ALIGNED_STORAGE_WITH_BOOST_ALIGNMENT(0x1000)
// Workaround for bogus [-Wignored-attributes] warning on GCC 6.x/7.x: don't use a type that "directly" carries the alignment attribute.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82270
template<std::size_t Len, std::size_t Align>
-struct aligned_struct_wrapper
+union aligned_struct_wrapper
{
- aligned_struct<Len, Align> dummy;
+ aligned_struct<Len, Align> aligner;
+ char data[sizeof(aligned_struct<Len, Align>)];
};
template<std::size_t Len, std::size_t Align>
@@ -1014,7 +1015,7 @@ template<class T, std::size_t Len>
union aligned_union
{
T aligner;
- char dummy[Len];
+ char data[Len];
};
template<std::size_t Len, std::size_t Align, class T, bool Ok>