diff options
Diffstat (limited to 'boost/move/algo/detail/adaptive_sort_merge.hpp')
-rw-r--r-- | boost/move/algo/detail/adaptive_sort_merge.hpp | 2284 |
1 files changed, 2284 insertions, 0 deletions
diff --git a/boost/move/algo/detail/adaptive_sort_merge.hpp b/boost/move/algo/detail/adaptive_sort_merge.hpp new file mode 100644 index 0000000000..46dba187c5 --- /dev/null +++ b/boost/move/algo/detail/adaptive_sort_merge.hpp @@ -0,0 +1,2284 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2015-2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// +// +// Stable sorting that works in O(N*log(N)) worst time +// and uses O(1) extra memory +// +////////////////////////////////////////////////////////////////////////////// +// +// The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin +// and explained in the article from the russian collaborative blog +// Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on +// ideas from B-C. Huang and M. A. Langston explained in their article +// "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)" +// (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf). +// +// This implementation by Ion Gaztanaga uses previous ideas with additional changes: +// +// - Use of GCD-based rotation. +// - Non power of two buffer-sizes. +// - Tries to find sqrt(len)*2 unique keys, so that the merge sort +// phase can form up to sqrt(len)*4 segments if enough keys are found. +// - The merge-sort phase can take advantage of external memory to +// save some additional combination steps. +// - Optimized comparisons when selection-sorting blocks as A and B blocks +// are already sorted. +// - The combination phase is performed alternating merge to left and merge +// to right phases minimizing swaps due to internal buffer repositioning. +// - When merging blocks special optimizations are made to avoid moving some +// elements twice. +// +// The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts +// from the sorting algorithm and implementing a block merge algorithm +// without moving elements left or right, which is used when external memory +// is available. +////////////////////////////////////////////////////////////////////////////// +#ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP +#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP + +#include <boost/move/detail/config_begin.hpp> +#include <boost/move/algo/move.hpp> +#include <boost/move/algo/detail/merge.hpp> +#include <boost/move/adl_move_swap.hpp> +#include <boost/move/algo/detail/insertion_sort.hpp> +#include <boost/move/algo/detail/merge_sort.hpp> +#include <boost/move/algo/detail/merge.hpp> +#include <boost/assert.hpp> +#include <boost/cstdint.hpp> + +#ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS + #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L) \ + print_stats(STR, L)\ + // +#else + #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L) +#endif + +namespace boost { +namespace movelib { + +namespace detail_adaptive { + +static const std::size_t AdaptiveSortInsertionSortThreshold = 16; +//static const std::size_t AdaptiveSortInsertionSortThreshold = 4; +BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0); + +#if defined BOOST_HAS_INTPTR_T + typedef ::boost::uintptr_t uintptr_t; +#else + typedef std::size_t uintptr_t; +#endif + +template<class T> +const T &min_value(const T &a, const T &b) +{ + return a < b ? a : b; +} + +template<class T> +const T &max_value(const T &a, const T &b) +{ + return a > b ? a : b; +} + +template<class T> +class adaptive_xbuf +{ + adaptive_xbuf(const adaptive_xbuf &); + adaptive_xbuf & operator=(const adaptive_xbuf &); + + public: + typedef T* iterator; + + adaptive_xbuf() + : m_ptr(0), m_size(0), m_capacity(0) + {} + + adaptive_xbuf(T *raw_memory, std::size_t capacity) + : m_ptr(raw_memory), m_size(0), m_capacity(capacity) + {} + + template<class RandIt> + void move_assign(RandIt first, std::size_t n) + { + if(n <= m_size){ + boost::move(first, first+n, m_ptr); + std::size_t size = m_size; + while(size-- != n){ + m_ptr[size].~T(); + } + m_size = n; + } + else{ + T *result = boost::move(first, first+m_size, m_ptr); + boost::uninitialized_move(first+m_size, first+n, result); + m_size = n; + } + } + + template<class RandIt> + void push_back(RandIt first, std::size_t n) + { + BOOST_ASSERT(m_capacity - m_size >= n); + boost::uninitialized_move(first, first+n, m_ptr+m_size); + m_size += n; + } + + template<class RandIt> + iterator add(RandIt it) + { + BOOST_ASSERT(m_size < m_capacity); + T * p_ret = m_ptr + m_size; + ::new(p_ret) T(::boost::move(*it)); + ++m_size; + return p_ret; + } + + template<class RandIt> + void insert(iterator pos, RandIt it) + { + if(pos == (m_ptr + m_size)){ + this->add(it); + } + else{ + this->add(m_ptr+m_size-1); + //m_size updated + boost::move_backward(pos, m_ptr+m_size-2, m_ptr+m_size-1); + *pos = boost::move(*it); + } + } + + void set_size(std::size_t size) + { + m_size = size; + } + + template<class U> + bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const + { + if(this->data()){ + uintptr_t u_addr_sz = uintptr_t(this->data()+size); + uintptr_t u_addr_cp = uintptr_t(this->data()+this->capacity()); + u_addr_sz = ((u_addr_sz + sizeof(U)-1)/sizeof(U))*sizeof(U); + + return (u_addr_cp >= u_addr_sz) && ((u_addr_cp - u_addr_sz)/sizeof(U) >= trail_count); + } + return false; + } + + template<class U> + U *aligned_trailing() const + { + return this->aligned_trailing<U>(this->size()); + } + + template<class U> + U *aligned_trailing(std::size_t pos) const + { + uintptr_t u_addr = uintptr_t(this->data()+pos); + u_addr = ((u_addr + sizeof(U)-1)/sizeof(U))*sizeof(U); + return (U*)u_addr; + } + + ~adaptive_xbuf() + { + this->clear(); + } + + std::size_t capacity() const + { return m_capacity; } + + iterator data() const + { return m_ptr; } + + iterator end() const + { return m_ptr+m_size; } + + std::size_t size() const + { return m_size; } + + bool empty() const + { return !m_size; } + + void clear() + { + std::size_t size = m_size; + while(size--){ + m_ptr[size].~T(); + } + m_size = 0u; + } + + private: + T *m_ptr; + std::size_t m_size; + std::size_t m_capacity; +}; + +template<class Iterator, class Op> +class range_xbuf +{ + range_xbuf(const range_xbuf &); + range_xbuf & operator=(const range_xbuf &); + + public: + typedef typename iterator_traits<Iterator>::size_type size_type; + typedef Iterator iterator; + + range_xbuf(Iterator first, Iterator last) + : m_first(first), m_last(first), m_cap(last) + {} + + template<class RandIt> + void move_assign(RandIt first, std::size_t n) + { + BOOST_ASSERT(size_type(n) <= size_type(m_cap-m_first)); + m_last = Op()(forward_t(), first, first+n, m_first); + } + + ~range_xbuf() + {} + + std::size_t capacity() const + { return m_cap-m_first; } + + Iterator data() const + { return m_first; } + + Iterator end() const + { return m_last; } + + std::size_t size() const + { return m_last-m_first; } + + bool empty() const + { return m_first == m_last; } + + void clear() + { + m_last = m_first; + } + + template<class RandIt> + iterator add(RandIt it) + { + Iterator pos(m_last); + *pos = boost::move(*it); + ++m_last; + return pos; + } + + void set_size(std::size_t size) + { + m_last = m_first; + m_last += size; + } + + private: + Iterator const m_first; + Iterator m_last; + Iterator const m_cap; +}; + +template<class RandIt, class Buf> +bool three_way_init( RandIt first1, RandIt last1, Buf &buf + , typename Buf::iterator &buf_first, typename Buf::iterator &buf_last, move_op) +{ + buf.move_assign(first1, last1-first1); + buf_first = buf.data(); + buf_last = buf.end(); + return true; +} + +template<class RandIt, class Buf> +bool three_way_init( RandIt first, RandIt last, Buf &buf + , typename Buf::iterator &buf_first, typename Buf::iterator &buf_last, swap_op) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + buf.clear(); + buf_first = buf.data(); + buf_last = buf_first + size_type(last-first); + return false; +} + +template<class T, class Buf> +void three_way_move(T &a, T &b, Buf &buf, move_op) +{ + buf.add(&b); + b = boost::move(a); +} + +template<class T, class Buf> +void three_way_move(T &a, T &b, Buf &buf, swap_op) +{ + T tmp(boost::move(*buf.end())); + buf.add(&b); + b = boost::move(a); + a = boost::move(tmp); +} + +/////////////////////////////////////////////////////////////////////////////// +// +// PARTIAL MERGE BUF +// +/////////////////////////////////////////////////////////////////////////////// + + +template<class Buf, class RandIt, class Compare, class Op> +RandIt op_partial_merge_with_buf_impl + ( RandIt first1, RandIt const last1, RandIt first2, RandIt last2 + , Buf &buf, typename Buf::iterator &buf_first1_in_out, typename Buf::iterator &buf_last1_in_out + , Compare comp, Op op + ) +{ + typedef typename Buf::iterator buf_iterator; + typedef typename iterator_traits<RandIt>::value_type value_type; + + BOOST_ASSERT(first1 != last1); + BOOST_ASSERT(first2 != last2); + buf_iterator buf_first1 = buf_first1_in_out; + buf_iterator buf_last1 = buf_last1_in_out; + + if(buf_first1 == buf_last1){ + //Skip any element that does not need to be moved + while(!comp(*last1, *first1)){ + ++first1; + if(first1 == last1){ + return first1; + } + } + + //If initialization is successful, move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(!three_way_init(first1, last1, buf, buf_first1, buf_last1, op)){ + three_way_move(*first2, *first1, buf, op); + for(++first1, ++first2; first1 != last1; ++first1){ + value_type &v = comp(*first2, *buf_first1) ? *first2++ : *buf_first1++; + three_way_move(v, *first1, buf, op); + } + } + } + + //Now merge from buffer + if(first2 != last2) + while(1){ + if(comp(*first2, *buf_first1)) { + op(first2++, first1++); + if(first2 == last2) + break; + } + else{ + op(buf_first1++, first1++); + if(buf_first1 == buf_last1) + break; + } + } + + buf_first1_in_out = buf_first1; + buf_last1_in_out = buf_last1; + return first1; +} + +template<class RandIt, class Buf, class Compare, class Op> +RandIt op_partial_merge_with_buf + ( RandIt first1, RandIt const last1, RandIt first2, RandIt last2 + , Buf &buf + , typename Buf::iterator &buf_first1_in_out + , typename Buf::iterator &buf_last1_in_out + , Compare comp + , Op op + , bool is_stable) +{ + return is_stable + ? op_partial_merge_with_buf_impl + (first1, last1, first2, last2, buf, buf_first1_in_out, buf_last1_in_out, comp, op) + : op_partial_merge_with_buf_impl + (first1, last1, first2, last2, buf, buf_first1_in_out, buf_last1_in_out, antistable<Compare>(comp), op) + ; +} + +// key_first - sequence of keys, in same order as blocks. key_comp(key, midkey) means stream A +// first - first element to merge. +// first[-l_block, 0) - buffer +// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded +// l_irreg1 is the irregular block to be merged before n_bef_irreg2 blocks (can be 0) +// n_bef_irreg2/n_aft_irreg2 are regular blocks +// l_irreg2 is a irregular block, that is to be merged after n_bef_irreg2 blocks and before n_aft_irreg2 blocks +// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). +template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class Buf> +void op_merge_blocks_with_buf + ( RandItKeys key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const l_irreg1 + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp + , Op op + , Buf & xbuf) +{ + if(n_bef_irreg2 == 0){ + RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); + op_buffered_merge(first, last_reg, last_reg+l_irreg2, comp, op, xbuf); + } + else { + typedef typename Buf::iterator buf_iterator; + buf_iterator buffer = xbuf.data(); + buf_iterator buffer_end = buffer; + RandIt first1 = first; + RandIt last1 = l_irreg1 ? first1 + l_irreg1 : first1 + l_block; + RandIt first2 = last1; + RandItKeys const key_end (key_first+n_bef_irreg2); + + for( bool is_range1_A = l_irreg1 ? true : key_comp(*key_first, midkey), skip_first_it = l_irreg1 != 0 + ; key_first != key_end; ){ + if(!skip_first_it){ + ++key_first; + } + skip_first_it = false; + bool const last_it = key_first == key_end; + //If the trailing block is empty, we'll make it equal to the previous if empty + bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); + + if(is_range1_A == is_range2_A){ + if(buffer != buffer_end){ + first1 = op(forward_t(), buffer, buffer_end, first1); + BOOST_ASSERT(first1 == first2); + buffer_end = buffer; + } + first1 = first2; + if(last_it){ + xbuf.clear(); + last1 = first2+l_block*n_aft_irreg2; + op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); + break; + } + else{ + last1 = first2 + l_block; + } + first2 += l_block; + } + else { + BOOST_ASSERT(!last_it || (l_irreg2 || n_aft_irreg2)); + if(last_it){ + RandIt res = op(forward_t(), buffer, buffer_end, first1); + BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; + last1 += l_block*n_aft_irreg2; + xbuf.clear(); + op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); + break; + } + else{ + RandIt const last2 = first2 + l_block; + first1 = op_partial_merge_with_buf(first1, last1, first2, last2, xbuf, buffer, buffer_end, comp, op, is_range1_A); + if(buffer == buffer_end){ + is_range1_A = is_range2_A; + } + last1 = last2; + first2 = last1; + BOOST_ASSERT((buffer == buffer_end) || (buffer_end-buffer) == (last1-first1)); + } + } + } + } +} + +template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Buf> +void merge_blocks_with_buf + ( RandItKeys key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const l_irreg1 + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp + , Buf & xbuf + , bool const xbuf_used) +{ + if(xbuf_used){ + op_merge_blocks_with_buf + (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op(), xbuf); + } + else{ + op_merge_blocks_with_buf + (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op(), xbuf); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// PARTIAL MERGE LEFT +// +/////////////////////////////////////////////////////////////////////////////// + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_left_middle_buffer_impl + (RandIt first1, RandIt const last1, RandIt const first2 + , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp + , Op op) +{ + while(first1 != last1 && !comp(next_key, *first1)){ + ++first1; + } + //Even if we copy backward, no overlapping occurs so use forward copy + //that can be faster specially with trivial types + RandIt const new_first1 = first2 - (last1 - first1); + BOOST_ASSERT(last1 <= new_first1); + op(forward_t(), first1, last1, new_first1); + return new_first1; +} + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_left_middle_buffer + ( RandIt first1, RandIt const last1, RandIt const first2 + , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_left_middle_buffer_impl(first1, last1, first2, next_key, comp, op) + : op_partial_merge_left_middle_buffer_impl(first1, last1, first2, next_key, antistable<Compare>(comp), op); +} + +// Partially merges two ordered ranges. Partially means that elements are merged +// until one of two ranges is exhausted (M elements from ranges 1 y 2). +// [buf_first, ...) -> buffer that can be overwritten +// [first1, last1) merge [last1,last2) -> [buf_first, buf_first+M) +// Note: distance(buf_first, first1) >= distance(last1, last2), so no overlapping occurs. +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_left_impl + ( RandIt buf_first, RandIt first1, RandIt const last1, RandIt const last2, Compare comp, Op op) +{ + RandIt first2 = last1; + while(first1 != last1){ + if(first2 == last2){ + return first1; + } + if(comp(*first2, *first1)) { + op(first2, buf_first); + ++first2; + } + else{ + op(first1, buf_first); + ++first1; + } + ++buf_first; + } + return first2; +} + + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_left + ( RandIt buf_first, RandIt first1, RandIt const last1, RandIt const last2, Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_left_impl(buf_first, first1, last1, last2, comp, op) + : op_partial_merge_left_impl(buf_first, first1, last1, last2, antistable<Compare>(comp), op); +} + +template<class RandIt> +bool three_way_side_init( RandIt first1, RandIt last1, RandIt buf_first1, move_op) +{ + boost::move(first1, last1, buf_first1); + return true; +} + +template<class RandIt> +bool three_way_side_init( RandIt, RandIt, RandIt, swap_op) +{ + return false; +} + +template<class T> +void three_way_side_move(T &a, T &b, T&c, move_op) +{ + c = boost::move(b); + b = boost::move(a); +} + +template<class T> +void three_way_side_move(T &a, T &b, T &c, swap_op) +{ + T tmp(boost::move(c)); + c = boost::move(b); + b = boost::move(a); + a = boost::move(tmp); +} + + +// Partially merges two ordered ranges. Partially means that elements are merged +// until one of two ranges is exhausted (M elements from ranges 1 y 2). +// [buf_first, ...) -> buffer that can be overwritten +// [first1, last1) merge [last1,last2) -> [buf_first, buf_first+M) +// Note: distance(buf_first, first1) >= distance(last1, last2), so no overlapping occurs. +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_left_smart_impl + ( RandIt first1, RandIt last1, RandIt first2, RandIt const last2, Compare comp, Op op) +{ + typedef typename iterator_traits<RandIt>::value_type value_type; + + RandIt dest; + if(last1 != first2){ + BOOST_ASSERT((first2-last1)==(last2-first2)); + //Skip any element that does not need to be moved + while(!comp(*first2, *first1)){ + ++first1; + if(first1 == last1) + return first2; + } + + RandIt buf_first1 = first2 - (last1-first1); + + //If initialization is successful, move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(!three_way_side_init(first1, last1, buf_first1, op)){ + RandIt buf_last1 = buf_first1; + three_way_side_move(*first2, *first1, *buf_last1++, op); + + RandIt const orig_first2 = first2;(void)(orig_first2); + for(++first1, ++first2; first1 != last1; ++first1, ++buf_last1){ + value_type &v = comp(*first2, *buf_first1) ? *first2++ : *buf_first1++; + three_way_side_move(v, *first1, *buf_last1, op); + } + BOOST_ASSERT(buf_last1 == orig_first2); + last1 = buf_last1; + } + else{ + last1 = first2; + } + dest = first1; + first1 = buf_first1; + BOOST_ASSERT((first1-dest) == (last2-first2)); + } + else{ + dest = first1-(last2-first2); + } + + BOOST_ASSERT(0 != (last1-first1)); + if(first2 != last2) + while(1){ + if(comp(*first2, *first1)) { + op(first2++, dest++); + if(first2 == last2){ + return first1; + } + } + else{ + op(first1++, dest++); + if(first1 == last1){ + return first2; + } + } + } + return first1; +} + + + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_left_smart + (RandIt first1, RandIt const last1, RandIt first2, RandIt const last2, Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_left_smart_impl(first1, last1, first2, last2, comp, op) + : op_partial_merge_left_smart_impl(first1, last1, first2, last2, antistable<Compare>(comp), op); +} + + +// first - first element to merge. +// first[-l_block, 0) - buffer +// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded +// key_first - sequence of keys, in same order as blocks. key<midkey means stream A +// n_bef_irreg2/n_aft_irreg2 are regular blocks +// l_irreg2 is a irregular block, that is to be merged after n_bef_irreg2 blocks and before n_aft_irreg2 blocks +// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). +template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op> +void op_merge_blocks_left + ( RandItKeys key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const l_irreg1 + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp, Op op) +{ + if(n_bef_irreg2 == 0){ + RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); + op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op); + } + else { + RandIt buffer = first - l_block; + RandIt first1 = first; + RandIt last1 = l_irreg1 ? first1 + l_irreg1 : first1 + l_block; + RandIt first2 = last1; + RandItKeys const key_end (key_first+n_bef_irreg2); + bool skip_first_it = l_irreg1 != 0; + for( bool is_range1_A = l_irreg1 ? true : key_comp(*key_first, midkey) + ; key_first != key_end; first2 += l_block){ + if(!skip_first_it){ + ++key_first; + } + skip_first_it = false; + bool const last_it = key_first == key_end; + //If the trailing block is empty, we'll make it equal to the previous if empty + bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); + bool const is_buffer_middle = last1 == buffer; + + if(is_range1_A == is_range2_A){ + //If range1 is buffered, write it to its final position + if(!is_buffer_middle){ + buffer = op(forward_t(), first1, last1, buffer); + } + + first1 = first2; + if(last_it){ + last1 = first2+l_block*n_aft_irreg2; + op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); + break; + } + else{ + last1 = first2 + l_block; + } + } + else { + BOOST_ASSERT(!last_it || (l_irreg2 || n_aft_irreg2)); + if(last_it){ + if(is_buffer_middle){ + //See comment below marked with (*) + first1 = op_partial_merge_left_middle_buffer(first1, last1, first2, first2[l_block*n_aft_irreg2], comp, op, is_range1_A); + last1 = first2; + buffer = first1 - l_block; + } + last1 += l_block*n_aft_irreg2; + op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); + break; + } + else{ + RandIt const last2 = first2 + l_block; + first1 = op_partial_merge_left_smart(first1, last1, first2, last2, comp, op, is_range1_A); + + if(first1 < first2){ //is_buffer_middle == true for the next iteration + last1 = first2; + buffer = last1; + } + else{ //is_buffer_middle == false for the next iteration + is_range1_A = is_range2_A; + buffer = first1 - l_block; + last1 = last2; + } + } + } + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// PARTIAL MERGE RIGHT +// +/////////////////////////////////////////////////////////////////////////////// + + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_right_middle_buffer_impl + ( RandIt const last1, RandIt const first2, RandIt last2, Compare comp, Op op) +{ + while(first2 != last2 && !comp(last2[-1], last1[-1])){ + --last2; + } + return op(forward_t(), first2, last2, last1); +} + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_right_middle_buffer + ( RandIt const last1, RandIt first2, RandIt const last2 + , Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_right_middle_buffer_impl(last1, first2, last2, comp, op) + : op_partial_merge_right_middle_buffer_impl(last1, first2, last2, antistable<Compare>(comp), op); +} + +// Partially merges two ordered ranges. Partially means that elements are merged +// until one of two ranges is exhausted (M elements from ranges 1 y 2). +// [last2, buf_last) -> buffer that can be overwritten +// [first1, last1) merge [last1,last2) -> [buf_last - M, buf_last) +// Note: distance(last2, buf_last) >= distance(first1, last1), so no overlapping occurs. +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_right_impl + ( RandIt const first1, RandIt last1, RandIt last2, RandIt buf_last, Compare comp, Op op) +{ + RandIt const first2 = last1; + while(first2 != last2){ + if(last1 == first1){ + return last2; + } + --last2; + --last1; + --buf_last; + if(comp(*last2, *last1)){ + op(last1, buf_last); + ++last2; + } + else{ + op(last2, buf_last); + ++last1; + } + } + return last1; +} + +template<class RandIt, class Compare, class Op> +RandIt op_partial_merge_right + ( RandIt first1, RandIt const last1, RandIt const last2, RandIt buf_last, Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_right_impl(first1, last1, last2, buf_last, comp, op) + : op_partial_merge_right_impl(first1, last1, last2, buf_last, antistable<Compare>(comp), op); +} + + +// first - first element to merge. +// last iterator is (first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2) +// [last, last+l_block) - buffer +// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded +// key_first - sequence of keys, in same order as blocks. key<midkey means stream A +// n_bef_irreg2/n_aft_irreg2 are regular blocks +// l_irreg2 is a irregular block, that is to be merged after n_bef_irreg2 blocks and before n_aft_irreg2 blocks +// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). +template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op> +void op_merge_blocks_right + ( RandItKeys const key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp, Op op) +{ + RandIt last1 = first + (n_bef_irreg2+n_aft_irreg2)*l_block; + RandIt first1 = last1-l_block; + RandIt first2 = last1; + RandIt last2 = first2 + l_irreg2; + RandIt buffer_end = last2 + l_block; + RandItKeys key_end (key_first+(n_bef_irreg2+n_aft_irreg2)); + + for(bool is_range2_A = false; key_first != key_end; last1 = first1, first1 -= l_block){ + --key_end; + bool const is_range1_A = key_comp(*key_end, midkey); + bool const is_buffer_middle = first2 == buffer_end; + + if(is_range1_A == is_range2_A){ + if(!is_buffer_middle){ + buffer_end = op(backward_t(), first2, last2, buffer_end); + } + //else //op forward already done on previous op_partial_merge_right + + first2 = first1; + last2 = last1; + } + else { + if(is_buffer_middle){ + //A previous op_partial_merge_right has right range2 elements after the buffer. + //In order to merge it with the next block, move them to the start of the buffer so that + //buffer is placed to the right. Move only the minimum elements as some range1 elements + //won't be moved in the merge. + last2 = op_partial_merge_right_middle_buffer(last1, first2, last2, comp, op, !is_range2_A); + first2 = last1; + buffer_end = last2 + l_block; + } + + //op_partial_merge_right merges two ranges, but stops moving elements + //once one range is emptied to avoid moving data twice in the next iteration + last2 = op_partial_merge_right(first1, last1, last2, buffer_end, comp, op, !is_range2_A); + if(last2 > first2){ //is_buffer_middle == true for the next iteration + buffer_end = first2; + } + else{ //is_buffer_middle == false for the next iteration + is_range2_A = is_range1_A; + buffer_end = last2 + l_block; + first2 = first1; + } + } + } + + if(first2 != buffer_end){ + op(backward_t(), first2, last2, buffer_end); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// PARTIAL MERGE BUFFERLESS +// +/////////////////////////////////////////////////////////////////////////////// + +// [first1, last1) merge [last1,last2) -> [first1,last2) +template<class RandIt, class Compare> +RandIt partial_merge_bufferless_impl + (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) +{ + if(last1 == last2){ + return first1; + } + bool const is_range1_A = *pis_range1_A; + if(first1 != last1 && comp(*last1, last1[-1])){ + do{ + RandIt const old_last1 = last1; + last1 = lower_bound(last1, last2, *first1, comp); + first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported + if(last1 == last2){ + return first1; + } + do{ + ++first1; + } while(last1 != first1 && !comp(*last1, *first1) ); + } while(first1 != last1); + } + *pis_range1_A = !is_range1_A; + return last1; +} + +// [first1, last1) merge [last1,last2) -> [first1,last2) +template<class RandIt, class Compare> +RandIt partial_merge_bufferless + (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) +{ + return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp) + : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp)); +} + + + +// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded +// keys - sequence of keys, in same order as blocks. key<midkey means stream A +// n_aft_irreg2 are regular blocks from stream A. l_irreg2 is length of last (irregular) block from stream B, that should go before n_aft_irreg2 blocks. +// l_irreg2=0 requires n_aft_irreg2=0 (no irregular blocks). l_irreg2>0, n_aft_irreg2=0 is possible. +template<class RandItKeys, class KeyCompare, class RandIt, class Compare> +void merge_blocks_bufferless + ( RandItKeys key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const l_irreg1 + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp) +{ + if(n_bef_irreg2 == 0){ + RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); + merge_bufferless(first, last_reg, last_reg+l_irreg2, comp); + } + else{ + RandIt first1 = first; + RandIt last1 = l_irreg1 ? first + l_irreg1: first + l_block; + RandItKeys const key_end (key_first+n_bef_irreg2); + bool is_range1_A = l_irreg1 ? true : key_comp(*key_first++, midkey); + + for( ; key_first != key_end; ++key_first){ + bool is_range2_A = key_comp(*key_first, midkey); + if(is_range1_A == is_range2_A){ + first1 = last1; + } + else{ + first1 = partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp); + } + last1 += l_block; + } + + if(l_irreg2){ + if(!is_range1_A){ + first1 = last1; + } + last1 += l_block*n_aft_irreg2; + merge_bufferless(first1, last1, last1+l_irreg2, comp); + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// BUFFERED MERGE +// +/////////////////////////////////////////////////////////////////////////////// +template<class RandIt, class Compare, class Op, class Buf> +void op_buffered_merge + ( RandIt first, RandIt const middle, RandIt last + , Compare comp, Op op + , Buf &xbuf) +{ + if(first != middle && middle != last && comp(*middle, middle[-1])){ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const len1 = size_type(middle-first); + size_type const len2 = size_type(last-middle); + if(len1 <= len2){ + first = upper_bound(first, middle, *middle, comp); + xbuf.move_assign(first, size_type(middle-first)); + op_merge_with_right_placed + (xbuf.data(), xbuf.end(), first, middle, last, comp, op); + } + else{ + last = lower_bound(middle, last, middle[-1], comp); + xbuf.move_assign(middle, size_type(last-middle)); + op_merge_with_left_placed + (first, middle, last, xbuf.data(), xbuf.end(), comp, op); + } + } +} + +template<class RandIt, class Compare> +void buffered_merge + ( RandIt first, RandIt const middle, RandIt last + , Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> &xbuf) +{ + op_buffered_merge(first, middle, last, comp, move_op(), xbuf); +} + +// Complexity: 2*distance(first, last)+max_collected^2/2 +// +// Tries to collect at most n_keys unique elements from [first, last), +// in the begining of the range, and ordered according to comp +// +// Returns the number of collected keys +template<class RandIt, class Compare> +typename iterator_traits<RandIt>::size_type + collect_unique + ( RandIt const first, RandIt const last + , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + typedef typename iterator_traits<RandIt>::value_type value_type; + size_type h = 0; + if(max_collected){ + ++h; // first key is always here + RandIt h0 = first; + RandIt u = first; ++u; + RandIt search_end = u; + + if(xbuf.capacity() >= max_collected){ + value_type *const ph0 = xbuf.add(first); + while(u != last && h < max_collected){ + value_type * const r = lower_bound(ph0, xbuf.end(), *u, comp); + //If key not found add it to [h, h+h0) + if(r == xbuf.end() || comp(*u, *r) ){ + RandIt const new_h0 = boost::move(search_end, u, h0); + search_end = u; + ++search_end; + ++h; + xbuf.insert(r, u); + h0 = new_h0; + } + ++u; + } + boost::move_backward(first, h0, h0+h); + boost::move(xbuf.data(), xbuf.end(), first); + } + else{ + while(u != last && h < max_collected){ + RandIt const r = lower_bound(h0, search_end, *u, comp); + //If key not found add it to [h, h+h0) + if(r == search_end || comp(*u, *r) ){ + RandIt const new_h0 = rotate_gcd(h0, search_end, u); + search_end = u; + ++search_end; + ++h; + rotate_gcd(r+(new_h0-h0), u, search_end); + h0 = new_h0; + } + ++u; + } + rotate_gcd(first, h0, h0+h); + } + } + return h; +} + +template<class Unsigned> +Unsigned floor_sqrt(Unsigned const n) +{ + Unsigned x = n; + Unsigned y = x/2 + (x&1); + while (y < x){ + x = y; + y = (x + n / x)/2; + } + return x; +} + +template<class Unsigned> +Unsigned ceil_sqrt(Unsigned const n) +{ + Unsigned r = floor_sqrt(n); + return r + Unsigned((n%r) != 0); +} + +template<class Unsigned> +Unsigned floor_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow) +{ + Unsigned s = n; + Unsigned p = 0; + while(s > AdaptiveSortInsertionSortThreshold){ + s /= 2; + ++p; + } + base = s; + pow = p; + return s << p; +} + +template<class Unsigned> +Unsigned ceil_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow) +{ + Unsigned fm = floor_merge_multiple(n, base, pow); + + if(fm != n){ + if(base < AdaptiveSortInsertionSortThreshold){ + ++base; + } + else{ + base = AdaptiveSortInsertionSortThreshold/2 + 1; + ++pow; + } + } + return base << pow; +} + +template<class Unsigned> +Unsigned ceil_sqrt_multiple(Unsigned const n, Unsigned *pbase = 0) +{ + Unsigned const r = ceil_sqrt(n); + Unsigned pow = 0; + Unsigned base = 0; + Unsigned const res = ceil_merge_multiple(r, base, pow); + if(pbase) *pbase = base; + return res; +} + +template<class Unsigned> +Unsigned ceil_sqrt_pow2(Unsigned const n) +{ + Unsigned r=1; + Unsigned exp = 0; + Unsigned pow = 1u; + while(pow != 0 && pow < n){ + r*=2; + ++exp; + pow = r << exp; + } + return r; +} + +struct less +{ + template<class T> + bool operator()(const T &l, const T &r) + { return l < r; } +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// MERGE BLOCKS +// +/////////////////////////////////////////////////////////////////////////////// + +template<class RandIt, class Compare> +void slow_stable_sort + ( RandIt const first, RandIt const last, Compare comp) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type L = size_type(last - first); + { //Use insertion sort to merge first elements + size_type m = 0; + while((L - m) > size_type(AdaptiveSortInsertionSortThreshold)){ + insertion_sort(first+m, first+m+size_type(AdaptiveSortInsertionSortThreshold), comp); + m += AdaptiveSortInsertionSortThreshold; + } + insertion_sort(first+m, last, comp); + } + + size_type h = AdaptiveSortInsertionSortThreshold; + for(bool do_merge = L > h; do_merge; h*=2){ + do_merge = (L - h) > h; + size_type p0 = 0; + if(do_merge){ + size_type const h_2 = 2*h; + while((L-p0) > h_2){ + merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp); + p0 += h_2; + } + } + if((L-p0) > h){ + merge_bufferless(first+p0, first+p0+h, last, comp); + } + } +} + +//Returns new l_block and updates use_buf +template<class Unsigned> +Unsigned lblock_for_combine + (Unsigned const l_block, Unsigned const n_keys, Unsigned const l_data, bool &use_buf) +{ + BOOST_ASSERT(l_data > 1); + + //We need to guarantee lblock >= l_merged/(n_keys/2) keys for the combination. + //We have at least 4 keys guaranteed (which are the minimum to merge 2 ranges) + //If l_block != 0, then n_keys is already enough to merge all blocks in all + //phases as we've found all needed keys for that buffer and length before. + //If l_block == 0 then see if half keys can be used as buffer and the rest + //as keys guaranteeing that n_keys >= (2*l_merged)/lblock = + if(!l_block){ + //If l_block == 0 then n_keys is power of two + //(guaranteed by build_params(...)) + BOOST_ASSERT(n_keys >= 4); + //BOOST_ASSERT(0 == (n_keys &(n_keys-1))); + + //See if half keys are at least 4 and if half keys fulfill + Unsigned const new_buf = n_keys/2; + Unsigned const new_keys = n_keys-new_buf; + use_buf = new_keys >= 4 && new_keys >= l_data/new_buf; + if(use_buf){ + return new_buf; + } + else{ + return l_data/n_keys; + } + } + else{ + use_buf = true; + return l_block; + } +} + + +//Although "cycle" sort is known to have the minimum number of writes to target +//selection sort is more appropriate here as we want to minimize swaps. +template<class RandItKeys, class KeyCompare, class RandIt, class Compare> +void selection_sort_blocks + ( RandItKeys keys + , typename iterator_traits<RandIt>::size_type &midkey_idx //inout + , KeyCompare key_comp + , RandIt const first_block + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const n_blocks + , Compare comp + , bool use_first_element) +{ + typedef typename iterator_traits<RandIt>::size_type size_type ; + size_type const back_midkey_idx = midkey_idx; + typedef typename iterator_traits<RandIt>::size_type size_type; + typedef typename iterator_traits<RandIt>::value_type value_type; + + //Nothing to sort if 0 or 1 blocks or all belong to the first ordered half + if(n_blocks < 2 || back_midkey_idx >= n_blocks){ + return; + } + //One-past the position of the first untouched element of the second half + size_type high_watermark = back_midkey_idx+1; + BOOST_ASSERT(high_watermark <= n_blocks); + + //Sort by first element if left merging, last element otherwise + size_type const reg_off = use_first_element ? 0u: l_block-1; + + for(size_type block=0; block < n_blocks-1; ++block){ + size_type min_block = block; + //Since we are searching for the minimum value in two sorted halves: + //Optimization 1: If block belongs to first half, don't waste time comparing elements of the first half. + //Optimization 2: It is enough to compare until the first untouched element of the second half. + high_watermark = size_type(max_value(block+2, high_watermark)); + BOOST_ASSERT(high_watermark <= n_blocks); + for(size_type next_block = size_type(max_value(block+1, back_midkey_idx)); next_block < high_watermark; ++next_block){ + const value_type &v = first_block[next_block*l_block+reg_off]; + const value_type &min = first_block[min_block*l_block+reg_off]; + if( comp(v, min) || (!comp(min, v) && key_comp(keys[next_block], keys[min_block])) ){ + min_block=next_block; + } + } + + if(min_block != block){ + BOOST_ASSERT(block >= back_midkey_idx || min_block >= back_midkey_idx); + BOOST_ASSERT(min_block < high_watermark); + //Update high watermark if n_blocks is not surpassed + high_watermark += size_type((min_block + 1) != n_blocks && (min_block + 1) == high_watermark); + BOOST_ASSERT(high_watermark <= n_blocks); + boost::adl_move_swap_ranges(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + boost::adl_move_swap(keys[block], keys[min_block]); + if(midkey_idx == block) + midkey_idx = min_block; + else if(midkey_idx == min_block) + midkey_idx = block; + } + } +} + +template<class RandIt, class Compare> +void stable_sort( RandIt first, RandIt last, Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const len = size_type(last - first); + size_type const half_len = len/2 + (len&1); + if(std::size_t(xbuf.capacity() - xbuf.size()) >= half_len) { + merge_sort(first, last, comp, xbuf.data()+xbuf.size()); + } + else{ + slow_stable_sort(first, last, comp); + } +} + +template<class RandIt, class Comp> +void initialize_keys( RandIt first, RandIt last + , Comp comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf) +{ + stable_sort(first, last, comp, xbuf); +} + +template<class RandIt, class U> +void initialize_keys( RandIt first, RandIt last + , less + , U &) +{ + typedef typename iterator_traits<RandIt>::value_type value_type; + std::size_t count = std::size_t(last - first); + for(std::size_t i = 0; i != count; ++i){ + *first = value_type(i); + ++first; + } +} + +template<class RandItKeys, class KeyCompare, class RandIt, class Compare> +void combine_params + ( RandItKeys const keys + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type l_combined + , typename iterator_traits<RandIt>::size_type const l_prev_merged + , typename iterator_traits<RandIt>::size_type const l_block + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + , Compare comp + //Output + , typename iterator_traits<RandIt>::size_type &midkey_idx + , typename iterator_traits<RandIt>::size_type &l_irreg1 + , typename iterator_traits<RandIt>::size_type &n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type &n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type &l_irreg2 + , bool is_merge_left) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + typedef typename iterator_traits<RandIt>::value_type value_type; + + l_irreg1 = l_prev_merged%l_block; + l_irreg2 = (l_combined-l_irreg1)%l_block; + BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); + size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; + midkey_idx = l_prev_merged/l_block; + BOOST_ASSERT(n_reg_block>=midkey_idx); + initialize_keys(keys, keys+n_reg_block+(midkey_idx==n_reg_block), key_comp, xbuf); + selection_sort_blocks(keys, midkey_idx, key_comp, first+l_irreg1, l_block, n_reg_block, comp, is_merge_left); + + n_aft_irreg2 = 0; + if(l_irreg2!=0){ + size_type const reg_off = is_merge_left ? 0u: l_block-1; + size_type const irreg_off = is_merge_left ? 0u: l_irreg2-1; + RandIt prev_block_first = first + l_combined - l_irreg2; + const value_type &incomplete_block_first = prev_block_first[irreg_off]; + while(n_aft_irreg2 != n_reg_block && + comp(incomplete_block_first, (prev_block_first-= l_block)[reg_off]) ){ + ++n_aft_irreg2; + } + } + n_bef_irreg2 = n_reg_block-n_aft_irreg2; +} + +// first - first element to merge. +// first[-l_block, 0) - buffer (if use_buf == true) +// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded +// keys - sequence of keys, in same order as blocks. key<midkey means stream A +// n_bef_irreg2/n_aft_irreg2 are regular blocks +// l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks +// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). +template<class RandItKeys, class KeyCompare, class RandIt, class Compare> +void merge_blocks_left + ( RandItKeys const key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const l_irreg1 + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp + , bool const xbuf_used) +{ + if(xbuf_used){ + op_merge_blocks_left + (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op()); + } + else{ + op_merge_blocks_left + (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op()); + } +} + + +// first - first element to merge. +// [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer +// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded +// keys - sequence of keys, in same order as blocks. key<midkey means stream A +// n_bef_irreg2/n_aft_irreg2 are regular blocks +// l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks +// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). +template<class RandItKeys, class KeyCompare, class RandIt, class Compare> +void merge_blocks_right + ( RandItKeys const key_first + , const typename iterator_traits<RandItKeys>::value_type &midkey + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_block + , typename iterator_traits<RandIt>::size_type const n_bef_irreg2 + , typename iterator_traits<RandIt>::size_type const n_aft_irreg2 + , typename iterator_traits<RandIt>::size_type const l_irreg2 + , Compare comp + , bool const xbuf_used) +{ + if(xbuf_used){ + op_merge_blocks_right + (key_first, midkey, key_comp, first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op()); + } + else{ + op_merge_blocks_right + (key_first, midkey, key_comp, first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op()); + } +} + + +template<class RandIt> +void move_data_backward( RandIt cur_pos + , typename iterator_traits<RandIt>::size_type const l_data + , RandIt new_pos + , bool const xbuf_used) +{ + //Move buffer to the total combination right + if(xbuf_used){ + boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); + } + else{ + boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); + //Rotate does less moves but it seems slower due to cache issues + //rotate_gcd(first-l_block, first+len-l_block, first+len); + } +} + +template<class RandIt> +void move_data_forward( RandIt cur_pos + , typename iterator_traits<RandIt>::size_type const l_data + , RandIt new_pos + , bool const xbuf_used) +{ + //Move buffer to the total combination right + if(xbuf_used){ + boost::move(cur_pos, cur_pos+l_data, new_pos); + } + else{ + boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); + //Rotate does less moves but it seems slower due to cache issues + //rotate_gcd(first-l_block, first+len-l_block, first+len); + } +} + +template <class Unsigned> +Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0) +{ + typedef Unsigned size_type; + + size_type const l_combined = 2*l_prev_merged; + size_type l_irreg_combined = len%l_combined; + size_type l_total_combined = len; + if(l_irreg_combined <= l_prev_merged){ + l_total_combined -= l_irreg_combined; + l_irreg_combined = 0; + } + if(pl_irreg_combined) + *pl_irreg_combined = l_irreg_combined; + return l_total_combined; +} + +// keys are on the left of first: +// If use_buf: [first - l_block - n_keys, first - l_block). +// Otherwise: [first - n_keys, first). +// Buffer (if use_buf) is also on the left of first [first - l_block, first). +// Blocks of length l_prev_merged combined. We'll combine them in pairs +// l_prev_merged and n_keys are powers of 2. (2*l_prev_merged/l_block) keys are guaranteed +// Returns the number of combined elements (some trailing elements might be left uncombined) +template<class RandItKeys, class KeyCompare, class RandIt, class Compare> +void combine_blocks + ( RandItKeys const keys + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits<RandIt>::size_type const len + , typename iterator_traits<RandIt>::size_type const l_prev_merged + , typename iterator_traits<RandIt>::size_type const l_block + , bool const use_buf + , bool const xbuf_used + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + , Compare comp + , bool merge_left) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + + size_type const l_combined = 2*l_prev_merged; + size_type l_irreg_combined = 0; + size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); + size_type const n_reg_combined = len/l_combined; + RandIt combined_first = first; + + (void)l_total_combined; + BOOST_ASSERT(l_total_combined <= len); + + size_type n_bef_irreg2, n_aft_irreg2, midkey_idx, l_irreg1, l_irreg2; + size_type const max_i = n_reg_combined + (l_irreg_combined != 0); + + if(merge_left || !use_buf) + for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_combined) { + bool const is_last = combined_i==n_reg_combined; + combine_params( keys, key_comp, combined_first, is_last ? l_irreg_combined : l_combined + , l_prev_merged, l_block, xbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); + BOOST_ASSERT(!l_irreg1); + if(use_buf){ + merge_blocks_left + (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + } + else{ + merge_blocks_bufferless + (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); + } + //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: ", len + l_block); + } + else{ + combined_first += l_combined*(max_i-1); + for( size_type combined_i = max_i; combined_i--; combined_first -= l_combined) { + bool const is_last = combined_i==n_reg_combined; + combine_params( keys, key_comp, combined_first, is_last ? l_irreg_combined : l_combined + , l_prev_merged, l_block, xbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, false); //Outputs + BOOST_ASSERT(!l_irreg1); + //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); + merge_blocks_right + (keys, keys[midkey_idx], key_comp, combined_first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_r: ", len + l_block); + } + } +} + + +template<class RandIt, class Compare> +typename iterator_traits<RandIt>::size_type + buffered_merge_blocks + ( RandIt const first, RandIt const last + , typename iterator_traits<RandIt>::size_type const input_combined_size + , Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> &xbuf) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type combined_size = input_combined_size; + + for( size_type const elements_in_blocks = size_type(last - first) + ; elements_in_blocks > combined_size && size_type(xbuf.capacity()) >= combined_size + ; combined_size *=2){ + RandIt merge_point = first; + while(size_type(last - merge_point) > 2*combined_size) { + RandIt const second_half = merge_point+combined_size; + RandIt const next_merge_point = second_half+combined_size; + buffered_merge(merge_point, second_half, next_merge_point, comp, xbuf); + merge_point = next_merge_point; + } + if(size_type(last-merge_point) > combined_size){ + buffered_merge(merge_point, merge_point+combined_size, last, comp, xbuf); + } + } + return combined_size; +} + +template<class RandIt, class Compare, class Op> +typename iterator_traits<RandIt>::size_type + op_insertion_sort_step_left + ( RandIt const first + , typename iterator_traits<RandIt>::size_type const length + , typename iterator_traits<RandIt>::size_type const step + , Compare comp, Op op) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold); + size_type m = 0; + + while((length - m) > s){ + insertion_sort_op(first+m, first+m+s, first+m-s, comp, op); + m += s; + } + insertion_sort_op(first+m, first+length, first+m-s, comp, op); + return s; +} + +template<class RandIt, class Compare> +typename iterator_traits<RandIt>::size_type + insertion_sort_step + ( RandIt const first + , typename iterator_traits<RandIt>::size_type const length + , typename iterator_traits<RandIt>::size_type const step + , Compare comp) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold); + size_type m = 0; + + while((length - m) > s){ + insertion_sort(first+m, first+m+s, comp); + m += s; + } + insertion_sort(first+m, first+length, comp); + return s; +} + +template<class RandIt, class Compare, class Op> +typename iterator_traits<RandIt>::size_type + op_merge_left_step + ( RandIt first_block + , typename iterator_traits<RandIt>::size_type const elements_in_blocks + , typename iterator_traits<RandIt>::size_type l_merged + , typename iterator_traits<RandIt>::size_type const l_build_buf + , typename iterator_traits<RandIt>::size_type l_left_space + , Compare comp + , Op op) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + for(; l_merged < l_build_buf && l_left_space >= l_merged; l_merged*=2){ + size_type p0=0; + RandIt pos = first_block; + while((elements_in_blocks - p0) > 2*l_merged) { + op_merge_left(pos-l_merged, pos, pos+l_merged, pos+2*l_merged, comp, op); + p0 += 2*l_merged; + pos = first_block+p0; + } + if((elements_in_blocks-p0) > l_merged) { + op_merge_left(pos-l_merged, pos, pos+l_merged, first_block+elements_in_blocks, comp, op); + } + else { + op(forward_t(), pos, first_block+elements_in_blocks, pos-l_merged); + } + first_block -= l_merged; + l_left_space -= l_merged; + } + return l_merged; +} + +template<class RandIt, class Compare, class Op> +void op_merge_right_step + ( RandIt first_block + , typename iterator_traits<RandIt>::size_type const elements_in_blocks + , typename iterator_traits<RandIt>::size_type const l_build_buf + , Compare comp + , Op op) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type restk = elements_in_blocks%(2*l_build_buf); + size_type p = elements_in_blocks - restk; + BOOST_ASSERT(0 == (p%(2*l_build_buf))); + + if(restk <= l_build_buf){ + op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf); + } + else{ + op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op); + } + while(p>0){ + p -= 2*l_build_buf; + op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); + } +} + + +// build blocks of length 2*l_build_buf. l_build_buf is power of two +// input: [0, l_build_buf) elements are buffer, rest unsorted elements +// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted +// +// First elements are merged from right to left until elements start +// at first. All old elements [first, first + l_build_buf) are placed at the end +// [first+len-l_build_buf, first+len). To achieve this: +// - If we have external memory to merge, we save elements from the buffer +// so that a non-swapping merge is used. Buffer elements are restored +// at the end of the buffer from the external memory. +// +// - When the external memory is not available or it is insufficient +// for a merge operation, left swap merging is used. +// +// Once elements are merged left to right in blocks of l_build_buf, then a single left +// to right merge step is performed to achieve merged blocks of size 2K. +// If external memory is available, usual merge is used, swap merging otherwise. +// +// As a last step, if auxiliary memory is available in-place merge is performed. +// until all is merged or auxiliary memory is not large enough. +template<class RandIt, class Compare> +typename iterator_traits<RandIt>::size_type + build_blocks + ( RandIt const first + , typename iterator_traits<RandIt>::size_type const len + , typename iterator_traits<RandIt>::size_type const l_base + , typename iterator_traits<RandIt>::size_type const l_build_buf + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + , Compare comp) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + BOOST_ASSERT(l_build_buf <= len); + BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); + + //Place the start pointer after the buffer + RandIt first_block = first + l_build_buf; + size_type const elements_in_blocks = len - l_build_buf; + + ////////////////////////////////// + // Start of merge to left step + ////////////////////////////////// + size_type l_merged = 0u; + +// if(xbuf.capacity()>=2*l_build_buf){ + if(!l_build_buf){ + l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); + //2*l_build_buf already merged, now try to merge further + //using classic in-place mergesort if enough auxiliary memory is available + return buffered_merge_blocks + (first_block, first_block + elements_in_blocks, l_merged, comp, xbuf); + } + else{ + //If there is no enough buffer for the insertion sort step, just avoid the external buffer + size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); + kbuf = kbuf < l_base ? 0 : kbuf; + + if(kbuf){ + //Backup internal buffer values in external buffer so they can be overwritten + xbuf.move_assign(first+l_build_buf-kbuf, kbuf); + l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); + + //Now combine them using the buffer. Elements from buffer can be + //overwritten since they've been saved to xbuf + l_merged = op_merge_left_step + ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); + + //Restore internal buffer from external buffer unless kbuf was l_build_buf, + //in that case restoration will happen later + if(kbuf != l_build_buf){ + boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); + } + } + else{ + l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); + rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); + } + + //Now combine elements using the buffer. Elements from buffer can't be + //overwritten since xbuf was not big enough, so merge swapping elements. + l_merged = op_merge_left_step + (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); + + BOOST_ASSERT(l_merged == l_build_buf); + + ////////////////////////////////// + // Start of merge to right step + ////////////////////////////////// + + //If kbuf is l_build_buf then we can merge right without swapping + //Saved data is still in xbuf + if(kbuf && kbuf == l_build_buf){ + op_merge_right_step(first, elements_in_blocks, l_build_buf, comp, move_op()); + //Restore internal buffer from external buffer if kbuf was l_build_buf. + //as this operation was previously delayed. + boost::move(xbuf.data(), xbuf.data() + kbuf, first); + } + else{ + op_merge_right_step(first, elements_in_blocks, l_build_buf, comp, swap_op()); + } + xbuf.clear(); + //2*l_build_buf already merged, now try to merge further + //using classic in-place mergesort if enough auxiliary memory is available + return buffered_merge_blocks + (first_block, first_block + elements_in_blocks, l_build_buf*2, comp, xbuf); + } +} + +//Returns true if buffer is placed in +//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is +//[buffer,buffer+l_intbuf) +template<class RandIt, class Compare> +bool combine_all_blocks + ( RandIt keys + , typename iterator_traits<RandIt>::size_type &n_keys + , RandIt const buffer + , typename iterator_traits<RandIt>::size_type const l_buf_plus_data + , typename iterator_traits<RandIt>::size_type l_merged + , typename iterator_traits<RandIt>::size_type &l_intbuf + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + , Compare comp) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + RandIt const first = buffer + l_intbuf; + size_type const l_data = l_buf_plus_data - l_intbuf; + size_type const l_unique = l_intbuf+n_keys; + //Backup data to external buffer once if possible + bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); + if(common_xbuf){ + if(n_keys){ + xbuf.move_assign(buffer, l_intbuf); + } + else{ + xbuf.clear(); + merge_sort_uninitialized_copy(buffer, first, xbuf.data(), comp); + xbuf.set_size(l_intbuf); + } + } + + bool prev_merge_left = true; + size_type l_prev_total_combined = 0u, l_prev_block = 0; + bool prev_use_internal_buf = true; + + for( size_type n = 0; l_data > l_merged + ; l_merged*=2 + , ++n){ + //If l_intbuf is non-zero, use that internal buffer. + // Implies l_block == l_intbuf && use_internal_buf == true + //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, + // Implies l_block == n_keys/2 && use_internal_buf == true + //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) + bool use_internal_buf = false; + size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf); + BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); + BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); + BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); + + bool const is_merge_left = (n&1) == 0; + size_type const l_total_combined = calculate_total_combined(l_data, l_merged); + + if(prev_use_internal_buf && prev_merge_left){ + if(is_merge_left || !use_internal_buf){ + move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); + } + else{ + //Put the buffer just after l_total_combined + RandIt const buf_end = first+l_prev_total_combined; + RandIt const buf_beg = buf_end-l_block; + if(l_prev_total_combined > l_total_combined){ + size_type const l_diff = l_prev_total_combined - l_total_combined; + move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); + } + else if(l_prev_total_combined < l_total_combined){ + size_type const l_diff = l_total_combined - l_prev_total_combined; + move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); + } + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After move_data : ", l_data + l_intbuf); + + //Combine to form l_merged*2 segments + if(n_keys){ + combine_blocks + ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + else{ + size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); + combine_blocks + ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_blocks: ", l_data + l_intbuf); + prev_merge_left = is_merge_left; + l_prev_total_combined = l_total_combined; + l_prev_block = l_block; + prev_use_internal_buf = use_internal_buf; + } + BOOST_ASSERT(l_prev_total_combined == l_data); + bool const buffer_right = prev_use_internal_buf && prev_merge_left; + l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; + n_keys = l_unique - l_intbuf; + //Restore data from to external common buffer if used + if(common_xbuf){ + if(buffer_right){ + boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); + } + else{ + boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); + } + } + return buffer_right; +} + +template<class RandIt, class Compare> +void stable_merge + ( RandIt first, RandIt const middle, RandIt last + , Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> &xbuf) +{ + BOOST_ASSERT(xbuf.empty()); + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const len1 = size_type(middle-first); + size_type const len2 = size_type(last-middle); + size_type const l_min = min_value(len1, len2); + if(xbuf.capacity() >= l_min){ + buffered_merge(first, middle, last, comp, xbuf); + xbuf.clear(); + } + else{ + merge_bufferless(first, middle, last, comp); + } +} + + +template<class RandIt, class Compare> +void final_merge( bool buffer_right + , RandIt const first + , typename iterator_traits<RandIt>::size_type const l_intbuf + , typename iterator_traits<RandIt>::size_type const n_keys + , typename iterator_traits<RandIt>::size_type const len + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + , Compare comp) +{ + BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); + xbuf.clear(); + + typedef typename iterator_traits<RandIt>::size_type size_type; + size_type const n_key_plus_buf = l_intbuf+n_keys; + if(buffer_right){ + stable_sort(first+len-l_intbuf, first+len, comp, xbuf); + stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); + stable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + else{ + stable_sort(first, first+n_key_plus_buf, comp, xbuf); + if(xbuf.capacity() >= n_key_plus_buf){ + buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); + } + else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ + stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + else{ + merge_bufferless(first, first+n_key_plus_buf, first+len, comp); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After final_merge : ", len); +} + +template<class RandIt, class Compare, class Unsigned, class T> +bool build_params + (RandIt first, Unsigned const len, Compare comp + , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf + , adaptive_xbuf<T> & xbuf + ) +{ + typedef Unsigned size_type; + + //Calculate ideal parameters and try to collect needed unique keys + l_base = 0u; + + //Try to find a value near sqrt(len) that is 2^N*l_base where + //l_base <= AdaptiveSortInsertionSortThreshold. This property is important + //as build_blocks merges to the left iteratively duplicating the + //merged size and all the buffer must be used just before the final + //merge to right step. This guarantees "build_blocks" produces + //segments of size l_build_buf*2, maximizing the classic merge phase. + l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); + + //This is the minimum number of case to implement the ideal algorithm + // + //l_intbuf is used as buffer plus the key count + size_type n_min_ideal_keys = l_intbuf-1u; + while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ + --n_min_ideal_keys; + } + ++n_min_ideal_keys; + BOOST_ASSERT(n_min_ideal_keys < l_intbuf); + + if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, n_min_ideal_keys)){ + n_keys = 0u; + l_build_buf = l_intbuf; + } + else{ + //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that + //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half + //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. + // + //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, + //(to be used for keys in combine_all_blocks) as the whole l_build_buf + //we'll be backuped in the buffer during build_blocks. + bool const non_unique_buf = xbuf.capacity() >= 2*l_intbuf; + size_type const to_collect = non_unique_buf ? l_intbuf : l_intbuf*2; + size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); + + //If available memory is 2*sqrt(l), then for "build_params" + //the situation is the same as if 2*l_intbuf were collected. + if(non_unique_buf && (collected >= n_min_ideal_keys)) + collected += l_intbuf; + + //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix + //is possible (due to very low unique keys), then go to a slow sort based on rotations. + if(collected < (n_min_ideal_keys+l_intbuf)){ + if(collected < 4){ //No combination possible with less that 4 keys + return false; + } + n_keys = l_intbuf; + while(n_keys&(n_keys-1)){ + n_keys &= n_keys-1; // make it power or 2 + } + while(n_keys > collected){ + n_keys/=2; + } + l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); + l_intbuf = 0; + l_build_buf = n_keys; + } + else if((collected - l_intbuf) >= l_intbuf){ + //l_intbuf*2 elements found. Use all of them in the build phase + l_build_buf = l_intbuf*2; + n_keys = l_intbuf; + } + else{ + l_build_buf = l_intbuf; + n_keys = n_min_ideal_keys; + } + BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); + } + + return true; +} + +template<class RandIt, class Compare> +void adaptive_sort_impl + ( RandIt first + , typename iterator_traits<RandIt>::size_type const len + , Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + + //Small sorts go directly to insertion sort + if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ + insertion_sort(first, first + len, comp); + return; + } + + if((len-len/2) <= xbuf.capacity()){ + merge_sort(first, first+len, comp, xbuf.data()); + return; + } + + //Make sure it is at least two + BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); + + size_type l_base = 0; + size_type l_intbuf = 0; + size_type n_keys = 0; + size_type l_build_buf = 0; + + if(!build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ + stable_sort(first, first+len, comp, xbuf); + return; + } + + //Otherwise, continue in adaptive_sort + BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n After collect_unique: ", len); + size_type const n_key_plus_buf = l_intbuf+n_keys; + //l_build_buf is always power of two if l_intbuf is zero + BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); + + //Classic merge sort until internal buffer and xbuf are exhausted + size_type const l_merged = build_blocks + (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After build_blocks: ", len); + + //Non-trivial merge + bool const buffer_right = combine_all_blocks + (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); + + //Sort keys and buffer and merge the whole sequence + final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); +} + +template<class RandIt, class Compare> +void adaptive_merge_impl + ( RandIt first + , typename iterator_traits<RandIt>::size_type const len1 + , typename iterator_traits<RandIt>::size_type const len2 + , Compare comp + , adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf + ) +{ + typedef typename iterator_traits<RandIt>::size_type size_type; + + if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ + buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); + } + else{ + const size_type len = len1+len2; + //Calculate ideal parameters and try to collect needed unique keys + size_type l_block = size_type(ceil_sqrt(len)); + + if(len1 <= l_block*2 || len2 <= l_block*2){ + merge_bufferless(first, first+len1, first+len1+len2, comp); + return; + } + + size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; + + //This is the minimum number of case to implement the ideal algorithm + //ceil(len/l_block) - 1 (as the first block is used as buffer) + size_type n_keys = l_block; + while(n_keys >= (len-l_intbuf-n_keys)/l_block){ + --n_keys; + } + ++n_keys; + BOOST_ASSERT(n_keys < l_block); + + if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ + n_keys = 0u; + } + + size_type const to_collect = l_intbuf+n_keys; + size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); + + BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n A collect: ", len); + if(collected != to_collect && collected < 4){ + merge_bufferless(first, first+len1, first+len1+len2, comp); + } + else{ + bool use_internal_buf = true; + if (collected != to_collect){ + l_intbuf = 0u; + n_keys = collected; + use_internal_buf = false; + l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); + l_intbuf = use_internal_buf ? l_block : 0u; + } + + bool xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; + size_type const l_combine = len-collected; + size_type const l_combine1 = len1-collected; + + size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx; + if(n_keys){ + RandIt const first_data = first+collected; + RandIt const keys = first; + combine_params( keys, comp, first_data, l_combine + , l_combine1, l_block, xbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); + if(xbuf_used){ + merge_blocks_with_buf + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); + } + else if(use_internal_buf){ + #define BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF + #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF + range_xbuf<RandIt, swap_op> rbuf(first_data-l_block, first_data); + merge_blocks_with_buf + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, rbuf, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); + #else + merge_blocks_left + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); + #endif + } + else{ + merge_blocks_bufferless + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len); + } + } + else{ + xbuf.clear(); + size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); + combine_params( uint_keys, less(), first, l_combine + , l_combine1, l_block, xbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); + merge_blocks_with_buf + (uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true); + xbuf.clear(); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); + } + + n_keys = collected-l_intbuf; + if(use_internal_buf){ + if(xbuf_used){ + xbuf.clear(); + //Nothing to do + if(n_keys){ + stable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + } + else{ + #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF + xbuf.clear(); + stable_sort(first, first+collected, comp, xbuf); + stable_merge(first, first+collected, first+len, comp, xbuf); + #else + xbuf.clear(); + stable_sort(first+len-l_block, first+len, comp, xbuf); + RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp); + RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len); + stable_merge(first+n_keys, pos1, pos2, antistable<Compare>(comp), xbuf); + if(n_keys){ + stable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + #endif + } + + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); + } + else{ + stable_sort(first, first+collected, comp, xbuf); + xbuf.clear(); + if(xbuf.capacity() >= collected){ + buffered_merge(first, first+collected, first+len1+len2, comp, xbuf); + } + else{ + merge_bufferless(first, first+collected, first+len1+len2, comp); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); + } + } +} + +} //namespace detail_adaptive { +} //namespace movelib { +} //namespace boost { + +#include <boost/move/detail/config_end.hpp> + +#endif //#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP |