Imported Upstream version 1.67.0upstream/1.67.0

author: DongHun Kwak <dh0128.kwak@samsung.com> 2019-12-05 15:12:59 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2019-12-05 15:12:59 +0900
commit: b8cf34c691623e4ec329053cbbf68522a855882d (patch)
tree: 34da08632a99677f6b79ecb65e5b655a5b69a67f /boost/sort
parent: 3fdc3e5ee96dca5b11d1694975a65200787eab86 (diff)
download: boost-b8cf34c691623e4ec329053cbbf68522a855882d.tar.gz
boost-b8cf34c691623e4ec329053cbbf68522a855882d.tar.bz2
boost-b8cf34c691623e4ec329053cbbf68522a855882d.zip
46 files changed, 13948 insertions, 3247 deletions
diff --git a/boost/sort/block_indirect_sort/blk_detail/backbone.hpp b/boost/sort/block_indirect_sort/blk_detail/backbone.hpp
new file mode 100644
index 0000000000..1c2fdfec88
--- /dev/null
+++ b/boost/sort/block_indirect_sort/blk_detail/backbone.hpp
@@ -0,0 +1,219 @@
+//----------------------------------------------------------------------------
+/// @file backbone.hpp
+/// @brief This file constains the class backbone, which is part of the
+///        block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BACKBONE_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BACKBONE_HPP
+
+#include <atomic>
+#include <boost/sort/pdqsort/pdqsort.hpp>
+#include <boost/sort/common/util/atomic.hpp>
+#include <boost/sort/common/util/algorithm.hpp>
+#include <boost/sort/common/stack_cnc.hpp>
+#include <future>
+#include <iostream>
+#include <iterator>
+
+#include <boost/sort/block_indirect_sort/blk_detail/block.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace blk_detail
+{
+
+//---------------------------------------------------------------------------
+//                 USING SENTENCES
+//---------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+namespace bscu = bsc::util;
+using bsc::stack_cnc;
+using bsc::range;
+
+///---------------------------------------------------------------------------
+/// @struct backbone
+/// @brief This contains all the information shared betwen the classes of the
+///        block indirect sort algorithm
+
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+struct backbone
+{
+    //-------------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //-------------------------------------------------------------------------
+    typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+    typedef std::atomic< uint32_t >                             atomic_t;
+    typedef range< size_t >                                     range_pos;
+    typedef range< Iter_t >                                     range_it;
+    typedef range< value_t * >                                  range_buf;
+    typedef std::function< void(void) >                         function_t;
+    typedef block< Block_size, Iter_t >                         block_t;
+
+    //------------------------------------------------------------------------
+    //                V A R I A B L E S
+    //------------------------------------------------------------------------
+    // range with all the element to sort
+    range< Iter_t > global_range;
+
+    // index vector of block_pos elements
+    std::vector< block_pos > index;
+
+    // Number of elements to sort
+    size_t nelem;
+
+    // Number of blocks to sort
+    size_t nblock;
+
+    // Number of elements in the last block (tail)
+    size_t ntail;
+
+    // object for to compare two elements
+    Compare cmp;
+
+    // range  of elements of the last block (tail)
+    range_it range_tail;
+
+    // thread local varible. It is a pointer to the buffer
+    static thread_local value_t *buf;
+
+    // concurrent stack where store the function_t elements
+    stack_cnc< function_t > works;
+
+    // global indicator of error
+    bool error;
+    //
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+    backbone (Iter_t first, Iter_t last, Compare comp);
+
+    //------------------------------------------------------------------------
+    //  function : get_block
+    /// @brief obtain the block in the position pos
+    /// @param pos : position of the range
+    /// @return block required
+    //------------------------------------------------------------------------
+    block_t get_block (size_t pos) const
+    {
+        return block_t (global_range.first + (pos * Block_size));
+    };
+    //-------------------------------------------------------------------------
+    //  function : get_range
+    /// @brief obtain the range in the position pos
+    /// @param pos : position of the range
+    /// @return range required
+    //-------------------------------------------------------------------------
+    range_it get_range (size_t pos) const
+    {
+        Iter_t it1 = global_range.first + (pos * Block_size);
+        Iter_t it2 =
+            (pos == (nblock - 1)) ? global_range.last : it1 + Block_size;
+        return range_it (it1, it2);
+    };
+    //-------------------------------------------------------------------------
+    //  function : get_range_buf
+    /// @brief obtain the auxiliary buffer of the thread
+    //-------------------------------------------------------------------------
+    range_buf get_range_buf ( ) const
+    {
+        return range_buf (buf, buf + Block_size);
+    };
+
+    //-------------------------------------------------------------------------
+    //  function : exec
+    /// @brief Initialize the thread local buffer with the ptr_buf pointer,
+    ///        and begin with the execution of the functions stored in works
+    //
+    /// @param ptr_buf : Pointer to the memory assigned to the thread_local
+    ///                  buffer
+    /// @param counter : atomic counter for to invoke to the exec function
+    ///                  with only 1 parameter
+    //-------------------------------------------------------------------------
+    void exec (value_t *ptr_buf, atomic_t &counter)
+    {
+        buf = ptr_buf;
+        exec (counter);
+    };
+
+    void exec (atomic_t &counter);
+
+//---------------------------------------------------------------------------
+}; // end struct backbone
+//---------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            N O N     I N L I N E      F U N C T I O N S                  ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+// initialization of the thread_local pointer to the auxiliary buffer
+template < uint32_t Block_size, class Iter_t, class Compare >
+thread_local typename std::iterator_traits< Iter_t >
+::value_type *backbone< Block_size, Iter_t, Compare >::buf = nullptr;
+
+//------------------------------------------------------------------------
+//  function : backbone
+/// @brief constructor of the class
+//
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+//------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+backbone< Block_size, Iter_t, Compare >
+::backbone (Iter_t first, Iter_t last, Compare comp)
+: global_range (first, last), cmp (comp), error (false)
+{
+    assert ((last - first) >= 0);
+    if (first == last) return; // nothing to do
+
+    nelem = size_t (last - first);
+    nblock = (nelem + Block_size - 1) / Block_size;
+    ntail = (nelem % Block_size);
+    index.reserve (nblock + 1);
+
+    for (size_t i = 0; i < nblock; ++i) index.emplace_back (block_pos (i));
+
+    range_tail.first =
+        (ntail == 0) ? last : (first + ((nblock - 1) * Block_size));
+    range_tail.last = last;
+};
+//
+//-------------------------------------------------------------------------
+//  function : exec
+/// @brief execute the function_t stored in works, until counter is zero
+//
+/// @param counter : atomic counter. When 0 exits the function
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+void backbone< Block_size, Iter_t, Compare >::exec (atomic_t &counter)
+{
+    function_t func_exec;
+    while (bscu::atomic_read (counter) != 0)
+    {
+        if (works.pop_move_back (func_exec)) func_exec ( );
+        else std::this_thread::yield ( );
+    };
+};
+//
+//****************************************************************************
+}; //    End namespace blk_detail
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+#endif
diff --git a/boost/sort/block_indirect_sort/blk_detail/block.hpp b/boost/sort/block_indirect_sort/blk_detail/block.hpp
new file mode 100644
index 0000000000..9c14b6103f
--- /dev/null
+++ b/boost/sort/block_indirect_sort/blk_detail/block.hpp
@@ -0,0 +1,180 @@
+//----------------------------------------------------------------------------
+/// @file block.hpp
+/// @brief This file contains the internal data structures used in the
+///        block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_HPP
+
+#include <boost/sort/common/range.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace blk_detail
+{
+//---------------------------------------------------------------------------
+//                 USING SENTENCES
+//---------------------------------------------------------------------------
+using namespace boost::sort::common;
+//
+//---------------------------------------------------------------------------
+/// @struct block_pos
+/// @brief represent a pair of values, a position represented as an unsigned
+///        variable ( position ), and a bool variable ( side ). They are packed
+///        in a size_t variable. The Least Significant Bit is the bool variable,
+///        and the others bits are the position
+//----------------------------------------------------------------------------
+class block_pos
+{
+    //------------------------------------------------------------------------
+    //                   VARIABLES
+    //-----------------------------------------------------------------------
+    size_t num; // number which store a position and a bool side
+
+  public:
+    //----------------------------- FUNCTIONS ------------------------------
+    block_pos (void) : num (0){};
+    //
+    //-------------------------------------------------------------------------
+    //  function : block_pos
+    /// @brief constructor from a position and a side
+    /// @param position : position to sotre
+    /// @param side : side to store
+    //-------------------------------------------------------------------------
+    block_pos (size_t position, bool side = false)
+    {
+        num = (position << 1) + ((side) ? 1 : 0);
+    };
+    //
+    //-------------------------------------------------------------------------
+    //  function : pos
+    /// @brief obtain the position stored inside the block_pos
+    /// @return position
+    //-------------------------------------------------------------------------
+    size_t pos (void) const { return (num >> 1); };
+    //
+    //-------------------------------------------------------------------------
+    //  function : pos
+    /// @brief store a position inside the block_pos
+    /// @param position : value to store
+    //-------------------------------------------------------------------------
+    void set_pos (size_t position) { num = (position << 1) + (num & 1); };
+    //
+    //-------------------------------------------------------------------------
+    //  function : side
+    /// @brief obtain the side stored inside the block_pos
+    /// @return bool value
+    //-------------------------------------------------------------------------
+    bool side (void) const { return ((num & 1) != 0); };
+    //
+    //-------------------------------------------------------------------------
+    //  function : side
+    /// @brief store a bool value the block_pos
+    /// @param sd : bool value to store
+    //-------------------------------------------------------------------------
+    void set_side (bool sd) { num = (num & ~1) + ((sd) ? 1 : 0); };
+}; // end struct block_pos
+
+//
+//---------------------------------------------------------------------------
+/// @struct block
+/// @brief represent a group of Block_size contiguous elements, beginning
+///        with the pointed by first
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t >
+struct block
+{
+    //----------------------------------------------------------------------
+    //                     VARIABLES
+    //----------------------------------------------------------------------
+    Iter_t first; // iterator to the first element of the block
+
+    //-------------------------------------------------------------------------
+    //  function : block
+    /// @brief constructor from an iterator to the first element of the block
+    /// @param it : iterator to the first element of the block
+    //-------------------------------------------------------------------------
+    block (Iter_t it) : first (it){};
+
+    //-------------------------------------------------------------------------
+    //  function : get_range
+    /// @brief convert a block in a range
+    /// @return range
+    //-------------------------------------------------------------------------
+    range< Iter_t > get_range (void)
+    {
+        return range_it (first, first + Block_size);
+    };
+
+}; // end struct block
+
+//
+//-------------------------------------------------------------------------
+//  function : compare_block
+/// @brief compare two blocks using the content of the pointed by first
+/// @param block1 : first block to compare
+/// @param block2 : second block to compare
+/// @param cmp : comparison operator
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+bool compare_block (block< Block_size, Iter_t > block1,
+                    block< Block_size, Iter_t > block2,
+                    Compare cmp = Compare ( ))
+{
+    return cmp (*block1.first, *block2.first);
+};
+//
+///---------------------------------------------------------------------------
+/// @struct compare_block_pos
+/// @brief This is a object for to compare two block_pos objects
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+struct compare_block_pos
+{
+    //-----------------------------------------------------------------------
+    //                        VARIABLES
+    //-----------------------------------------------------------------------
+    Iter_t global_first; // iterator to the first element to sort
+    Compare comp;        // comparison object for to compare two elements
+
+    //-------------------------------------------------------------------------
+    //  function : compare_block_pos
+    /// @brief constructor
+    /// @param g_first : itertor to the first element to sort
+    /// @param cmp : comparison operator
+    //-------------------------------------------------------------------------
+    compare_block_pos (Iter_t g_first, Compare cmp)
+        : global_first (g_first), comp (cmp){};
+    //
+    //-------------------------------------------------------------------------
+    //  function : operator ()
+    /// @brief compare two blocks using the content of the pointed by
+    ///        global_first
+    /// @param block_pos1 : first block to compare
+    /// @param block_pos2 : second block to compare
+    //-------------------------------------------------------------------------
+    bool operator( ) (block_pos block_pos1, block_pos block_pos2) const
+    {
+        return comp (*(global_first + (block_pos1.pos ( ) * Block_size)),
+                     *(global_first + (block_pos2.pos ( ) * Block_size)));
+    };
+
+}; // end struct compare_block_pos
+
+//****************************************************************************
+}; //    End namespace blk_detail
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/block_indirect_sort/blk_detail/constants.hpp b/boost/sort/block_indirect_sort/blk_detail/constants.hpp
new file mode 100644
index 0000000000..c407243025
--- /dev/null
+++ b/boost/sort/block_indirect_sort/blk_detail/constants.hpp
@@ -0,0 +1,26 @@
+//----------------------------------------------------------------------------
+/// @file constants.hpp
+/// @brief This file contains the constants values used in the algorithms
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_CONSTANTS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_CONSTANTS_HPP
+
+// This value is the block size in the block_indirect_sort algorithm
+#define BOOST_BLOCK_SIZE 1024
+
+// This value represent the group size in the block_indirect_sort algorithm
+#define BOOST_GROUP_SIZE 64
+
+// This value is the minimal number of threads for to use the
+// block_indirect_sort algorithm
+#define BOOST_NTHREAD_BORDER 6
+
+#endif
diff --git a/boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp b/boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp
new file mode 100644
index 0000000000..a4185b53af
--- /dev/null
+++ b/boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp
@@ -0,0 +1,426 @@
+//----------------------------------------------------------------------------
+/// @file merge_blocks.hpp
+/// @brief contains the class merge_blocks, which is part of the
+///        block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_MERGE_BLOCKS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_MERGE_BLOCKS_HPP
+
+#include <atomic>
+#include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
+#include <boost/sort/common/range.hpp>
+#include <future>
+#include <iostream>
+#include <iterator>
+
+namespace boost
+{
+namespace sort
+{
+namespace blk_detail
+{
+//----------------------------------------------------------------------------
+//                          USING SENTENCES
+//----------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+namespace bscu = bsc::util;
+using bsc::range;
+using bsc::is_mergeable;
+using bsc::merge_uncontiguous;
+//
+///---------------------------------------------------------------------------
+/// @struct merge_blocks
+/// @brief This class merge the blocks. The blocks to merge are defined by two
+///        ranges of positions in the index of the backbone
+//----------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+struct merge_blocks
+{
+    //-----------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //-----------------------------------------------------------------------
+    typedef typename std::iterator_traits<Iter_t>::value_type value_t;
+    typedef std::atomic<uint32_t> atomic_t;
+    typedef range<size_t> range_pos;
+    typedef range<Iter_t> range_it;
+    typedef range<value_t *> range_buf;
+    typedef std::function<void(void)> function_t;
+    typedef backbone<Block_size, Iter_t, Compare> backbone_t;
+    typedef compare_block_pos<Block_size, Iter_t, Compare> compare_block_pos_t;
+
+    //------------------------------------------------------------------------
+    //                V A R I A B L E S
+    //------------------------------------------------------------------------
+    // Object with the elements to sort and all internal data structures of the
+    // algorithm
+    backbone_t &bk;
+    //
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+    merge_blocks(backbone_t &bkb, size_t pos_index1, size_t pos_index2,
+                    size_t pos_index3);
+
+    void tail_process(std::vector<block_pos> &vblkpos1,
+                    std::vector<block_pos> &vblkpos2);
+
+    void cut_range(range_pos rng);
+
+    void merge_range_pos(range_pos rng);
+
+    void extract_ranges(range_pos range_input);
+    //
+    //------------------------------------------------------------------------
+    //  function : function_merge_range_pos
+    /// @brief create a function_t with a call to merge_range_pos, and insert
+    ///        in the stack of the backbone
+    //
+    /// @param rng_input : range of positions of blocks in the index to merge
+    /// @param son_counter : atomic variable which is decremented when finish
+    ///                      the function. This variable is used for to know
+    ///                      when are finished all the function_t created
+    ///                      inside an object
+    /// @param error : global indicator of error.
+    ///
+    //------------------------------------------------------------------------
+    void function_merge_range_pos(const range_pos &rng_input, atomic_t &counter,
+                    bool &error)
+    {
+        bscu::atomic_add(counter, 1);
+        function_t f1 = [this, rng_input, &counter, &error]( ) -> void
+        {
+            if (not error)
+            {
+                try
+                {
+                    this->merge_range_pos (rng_input);
+                }
+                catch (std::bad_alloc &ba)
+                {
+                    error = true;
+                };
+            }
+            bscu::atomic_sub (counter, 1);
+        };
+        bk.works.emplace_back(f1);
+    }
+    ;
+    //
+    //------------------------------------------------------------------------
+    //  function : function_cut_range
+    /// @brief create a function_t with a call to cut_range, and inser in
+    ///        the stack of the backbone
+    //
+    /// @param rng_input : range of positions in the index to cut
+    /// @param counter : atomic variable which is decremented when finish
+    ///                  the function. This variable is used for to know
+    ///                  when are finished all the function_t created
+    ///                  inside an object
+    /// @param error : global indicator of error.
+    //------------------------------------------------------------------------
+    void function_cut_range(const range_pos &rng_input, atomic_t &counter,
+                    bool &error)
+    {
+        bscu::atomic_add(counter, 1);
+        function_t f1 = [this, rng_input, &counter, &error]( ) -> void
+        {
+            if (not error)
+            {
+                try
+                {
+                    this->cut_range (rng_input);
+                }
+                catch (std::bad_alloc &)
+                {
+                    error = true;
+                };
+            }
+            bscu::atomic_sub (counter, 1);
+        };
+        bk.works.emplace_back(f1);
+    }
+
+
+//----------------------------------------------------------------------------
+};
+// end struct merge_blocks
+//----------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            N O N     I N L I N E      F U N C T I O N S                  ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+//  function : merge_blocks
+/// @brief make the indirect merge of the two range_pos defined by their index
+///        position [pos_index1, pos_index2 ) and [ pos_index2, pos_index3 )
+//
+/// @param bkb : backbone with all the data to sort , and the internal data
+///              structures of the algorithm
+/// @param pos_index1 : first position of the first range in the index
+/// @param pos_index2 : last position of the first range and first position
+///                     of the second range in the index
+/// @param pos_index3 : last position of the second range in the index
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+merge_blocks<Block_size, Group_size, Iter_t, Compare>
+::merge_blocks( backbone_t &bkb, size_t pos_index1, size_t pos_index2,
+                size_t pos_index3) : bk(bkb)
+{
+    size_t nblock1 = pos_index2 - pos_index1;
+    size_t nblock2 = pos_index3 - pos_index2;
+    if (nblock1 == 0 or nblock2 == 0) return;
+
+    //-----------------------------------------------------------------------
+    // Merging of the two intervals
+    //-----------------------------------------------------------------------
+    std::vector<block_pos> vpos1, vpos2;
+    vpos1.reserve(nblock1 + 1);
+    vpos2.reserve(nblock2 + 1);
+
+    for (size_t i = pos_index1; i < pos_index2; ++i)
+    {
+        vpos1.emplace_back(bk.index[i].pos(), true);
+    };
+
+    for (size_t i = pos_index2; i < pos_index3; ++i)
+    {
+        vpos2.emplace_back(bk.index[i].pos(), false);
+    };
+    //-------------------------------------------------------------------
+    //  tail process
+    //-------------------------------------------------------------------
+    if (vpos2.back().pos() == (bk.nblock - 1)
+                    and bk.range_tail.first != bk.range_tail.last)
+    {
+        tail_process(vpos1, vpos2);
+        nblock1 = vpos1.size();
+        nblock2 = vpos2.size();
+    };
+
+    compare_block_pos_t cmp_blk(bk.global_range.first, bk.cmp);
+    if (bk.error) return;
+    bscu::merge(vpos1.begin(), vpos1.end(), vpos2.begin(), vpos2.end(),
+                    bk.index.begin() + pos_index1, cmp_blk);
+    if (bk.error) return;
+    // Extracting the ranges for to merge the elements
+    extract_ranges(range_pos(pos_index1, pos_index1 + nblock1 + nblock2));
+}
+
+
+//
+//-------------------------------------------------------------------------
+//  function : tail_process
+/// @brief make the process when the second vector of block_pos to merge is
+///        the last, and have an incomplete block ( tail)
+//
+/// @param vblkpos1 : first vector of block_pos elements to merge
+/// @param vblkpos2 : second vector of block_pos elements to merge
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void merge_blocks<Block_size, Group_size, Iter_t, Compare>
+::tail_process( std::vector<block_pos> &vblkpos1,
+                std::vector<block_pos> &vblkpos2 )
+{
+    if (vblkpos1.size() == 0 or vblkpos2.size() == 0) return;
+
+    vblkpos2.pop_back();
+
+    size_t posback1 = vblkpos1.back().pos();
+    range_it range_back1 = bk.get_range(posback1);
+
+    if (bsc::is_mergeable(range_back1, bk.range_tail, bk.cmp))
+    {
+        bsc::merge_uncontiguous(range_back1, bk.range_tail, bk.get_range_buf(),
+                        bk.cmp);
+        if (vblkpos1.size() > 1)
+        {
+            size_t pos_aux = vblkpos1[vblkpos1.size() - 2].pos();
+            range_it range_aux = bk.get_range(pos_aux);
+
+            if (bsc::is_mergeable(range_aux, range_back1, bk.cmp))
+            {
+                vblkpos2.emplace_back(posback1, false);
+                vblkpos1.pop_back();
+            };
+        };
+    };
+}
+
+//
+//-------------------------------------------------------------------------
+//  function : cut_range
+/// @brief when the rng_input is greather than Group_size, this function divide
+///        it in several parts creating function_t elements, which are inserted
+///        in the concurrent stack of the backbone
+//
+/// @param rng_input : range to divide
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void merge_blocks<Block_size, Group_size, Iter_t, Compare>
+::cut_range(range_pos rng_input)
+{
+    if (rng_input.size() < Group_size)
+    {
+        merge_range_pos(rng_input);
+        return;
+    };
+
+    atomic_t counter(0);
+    size_t npart = (rng_input.size() + Group_size - 1) / Group_size;
+    size_t size_part = rng_input.size() / npart;
+
+    size_t pos_ini = rng_input.first;
+    size_t pos_last = rng_input.last;
+
+    while (pos_ini < pos_last)
+    {
+        size_t pos = pos_ini + size_part;
+        while (pos < pos_last
+                        and bk.index[pos - 1].side() == bk.index[pos].side())
+        {
+            ++pos;
+        };
+        if (pos < pos_last)
+        {
+            merge_uncontiguous(bk.get_range(bk.index[pos - 1].pos()),
+                            bk.get_range(bk.index[pos].pos()),
+                            bk.get_range_buf(), bk.cmp);
+        }
+        else pos = pos_last;
+        if ((pos - pos_ini) > 1)
+        {
+            range_pos rng_aux(pos_ini, pos);
+            function_merge_range_pos(rng_aux, counter, bk.error);
+        };
+        pos_ini = pos;
+    };
+    bk.exec(counter); // wait until finish all the ranges
+}
+
+
+//
+//-------------------------------------------------------------------------
+//  function : merge_range_pos
+/// @brief make the indirect merge of the blocks inside the rng_input
+//
+/// @param rng_input : range of positions of the blocks to merge
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void merge_blocks<Block_size, Group_size, Iter_t, Compare>
+::merge_range_pos(range_pos rng_input)
+{
+    if (rng_input.size() < 2) return;
+    range_buf rbuf = bk.get_range_buf();
+
+    range_it rng_prev = bk.get_range(bk.index[rng_input.first].pos());
+    move_forward(rbuf, rng_prev);
+    range_it rng_posx(rng_prev);
+
+    for (size_t posx = rng_input.first + 1; posx != rng_input.last; ++posx)
+    {
+        rng_posx = bk.get_range(bk.index[posx].pos());
+        bsc::merge_flow(rng_prev, rbuf, rng_posx, bk.cmp);
+        rng_prev = rng_posx;
+
+    };
+    move_forward(rng_posx, rbuf);
+}
+//
+//-------------------------------------------------------------------------
+//  function : extract_ranges
+/// @brief from a big range of positions of blocks in the index. Examine which
+///        are mergeable, and generate a couple of ranges for to be merged.
+///        With the ranges obtained generate function_t elements and are
+///        inserted in the concurrent stack.
+///        When the range obtained is smaller than Group_size, generate a
+///        function_t calling to merge_range_pos, when is greater, generate a
+///        function_t calling to cut_range
+//
+/// @param rpos range_input : range of the position in the index, where must
+///                           extract the ranges to merge
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void merge_blocks<Block_size, Group_size, Iter_t, Compare>
+::extract_ranges(range_pos range_input)
+{
+    if (range_input.size() < 2) return;
+    atomic_t counter(0);
+
+    // The names with x are positions of the index
+    size_t posx_ini = range_input.first;
+    block_pos bp_posx_ini = bk.index[posx_ini];
+
+    range_it rng_max = bk.get_range(bp_posx_ini.pos());
+    bool side_max = bp_posx_ini.side();
+
+    block_pos bp_posx;
+    range_it rng_posx = rng_max;
+    bool side_posx = side_max;
+
+    for (size_t posx = posx_ini + 1; posx <= range_input.last; ++posx)
+    {
+        bool final = (posx == range_input.last);
+        bool mergeable = false;
+
+        if (not final)
+        {
+            bp_posx = bk.index[posx];
+            rng_posx = bk.get_range(bp_posx.pos());
+            side_posx = bp_posx.side();
+            mergeable = (side_max != side_posx
+                            and is_mergeable(rng_max, rng_posx, bk.cmp));
+        };
+        if (bk.error) return;
+        if (final or not mergeable)
+        {
+            range_pos rp_final(posx_ini, posx);
+            if (rp_final.size() > 1)
+            {
+                if (rp_final.size() > Group_size)
+                {
+                    function_cut_range(rp_final, counter, bk.error);
+                }
+                else
+                {
+                    function_merge_range_pos(rp_final, counter, bk.error);
+                };
+            };
+            posx_ini = posx;
+            if (not final)
+            {
+                rng_max = rng_posx;
+                side_max = side_posx;
+            };
+        }
+        else
+        {
+            if (bk.cmp(*(rng_max.back()), *(rng_posx.back())))
+            {
+                rng_max = rng_posx;
+                side_max = side_posx;
+            };
+        };
+    };
+    bk.exec(counter);
+}
+//
+//****************************************************************************
+}; //    End namespace blk_detail
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp b/boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp
new file mode 100644
index 0000000000..6b556bcf47
--- /dev/null
+++ b/boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp
@@ -0,0 +1,284 @@
+//----------------------------------------------------------------------------
+/// @file move_blocks.hpp
+/// @brief contains the class move_blocks, which is part of the
+///        block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_MOVE_BLOCKS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_MOVE_BLOCKS_HPP
+
+#include <atomic>
+#include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
+#include <future>
+#include <iostream>
+#include <iterator>
+
+namespace boost
+{
+namespace sort
+{
+namespace blk_detail
+{
+//----------------------------------------------------------------------------
+//                          USING SENTENCES
+//----------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+//
+///---------------------------------------------------------------------------
+/// @struct move_blocks
+/// @brief This class move the blocks, trnasforming a logical sort by an index,
+///        in physical sort
+//----------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+struct move_blocks
+{
+    //-------------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //-------------------------------------------------------------------------
+    typedef move_blocks<Block_size, Group_size, Iter_t, Compare> this_type;
+    typedef typename std::iterator_traits<Iter_t>::value_type value_t;
+    typedef std::atomic<uint32_t> atomic_t;
+    typedef bsc::range<size_t> range_pos;
+    typedef bsc::range<Iter_t> range_it;
+    typedef bsc::range<value_t *> range_buf;
+    typedef std::function<void(void)> function_t;
+    typedef backbone<Block_size, Iter_t, Compare> backbone_t;
+
+    //------------------------------------------------------------------------
+    //                V A R I A B L E S
+    //------------------------------------------------------------------------
+    // Object with the elements to sort and all internal data structures of the
+    // algorithm
+    backbone_t &bk;
+
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+    move_blocks(backbone_t &bkb);
+
+    void move_sequence(const std::vector<size_t> &init_sequence);
+
+    void move_long_sequence(const std::vector<size_t> &init_sequence);
+    //
+    //------------------------------------------------------------------------
+    //  function : function_move_sequence
+    /// @brief create a function_t with a call to move_sequence, and insert
+    ///        in the stack of the backbone
+    ///
+    /// @param sequence :sequence of positions for to move the blocks
+    /// @param counter : atomic variable which is decremented when finish
+    ///                  the function. This variable is used for to know
+    ///                  when are finished all the function_t created
+    ///                  inside an object
+    /// @param error : global indicator of error.
+    //------------------------------------------------------------------------
+    void function_move_sequence(std::vector<size_t> &sequence,
+                                atomic_t &counter, bool &error)
+    {
+        bscu::atomic_add(counter, 1);
+        function_t f1 = [this, sequence, &counter, &error]( ) -> void
+        {
+            if (not error)
+            {
+                try
+                {
+                    this->move_sequence (sequence);
+                }
+                catch (std::bad_alloc &)
+                {
+                    error = true;
+                };
+            }
+            bscu::atomic_sub (counter, 1);
+        };
+        bk.works.emplace_back(f1);
+    }
+
+    //
+    //------------------------------------------------------------------------
+    //  function : function_move_long_sequence
+    /// @brief create a function_t with a call to move_long_sequence, and
+    ///        insert in the stack of the backbone
+    //
+    /// @param sequence :sequence of positions for to move the blocks
+    /// @param counter : atomic variable which is decremented when finish
+    ///                  the function. This variable is used for to know
+    ///                  when are finished all the function_t created
+    ///                  inside an object
+    /// @param error : global indicator of error.
+    //------------------------------------------------------------------------
+    void function_move_long_sequence(std::vector<size_t> &sequence,
+                                     atomic_t &counter, bool &error)
+    {
+        bscu::atomic_add(counter, 1);
+        function_t f1 = [this, sequence, &counter, &error]( ) -> void
+        {
+            if (not error)
+            {
+                try
+                {
+                    this->move_long_sequence (sequence);
+                }
+                catch (std::bad_alloc &)
+                {
+                    error = true;
+                };
+            }
+            bscu::atomic_sub (counter, 1);
+        };
+        bk.works.emplace_back(f1);
+    }
+    ;
+//---------------------------------------------------------------------------
+}; // end of struct move_blocks
+//---------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            N O N     I N L I N E      F U N C T I O N S                  ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+//  function : move_blocks
+/// @brief constructor of the class for to move the blocks to their true
+///        position obtained from the index
+//
+/// @param bkb : backbone with the index and the blocks
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+move_blocks<Block_size, Group_size, Iter_t, Compare>
+::move_blocks(backbone_t &bkb) : bk(bkb)
+{
+    std::vector<std::vector<size_t> > vsequence;
+    vsequence.reserve(bk.index.size() >> 1);
+    std::vector<size_t> sequence;
+    atomic_t counter(0);
+
+    size_t pos_index_ini = 0, pos_index_src = 0, pos_index_dest = 0;
+    while (pos_index_ini < bk.index.size())
+    {
+        while (pos_index_ini < bk.index.size()
+                        and bk.index[pos_index_ini].pos() == pos_index_ini)
+        {
+            ++pos_index_ini;
+        };
+
+        if (pos_index_ini == bk.index.size()) break;
+
+        sequence.clear();
+        pos_index_src = pos_index_dest = pos_index_ini;
+        sequence.push_back(pos_index_ini);
+
+        while (bk.index[pos_index_dest].pos() != pos_index_ini)
+        {
+            pos_index_src = bk.index[pos_index_dest].pos();
+            sequence.push_back(pos_index_src);
+
+            bk.index[pos_index_dest].set_pos(pos_index_dest);
+            pos_index_dest = pos_index_src;
+        };
+
+        bk.index[pos_index_dest].set_pos(pos_index_dest);
+        vsequence.push_back(sequence);
+
+        if (sequence.size() < Group_size)
+        {
+            function_move_sequence(vsequence.back(), counter, bk.error);
+        }
+        else
+        {
+            function_move_long_sequence(vsequence.back(), counter, bk.error);
+        };
+    };
+    bk.exec(counter);
+}
+;
+//
+//-------------------------------------------------------------------------
+//  function : move_sequence
+/// @brief move the blocks, following the positions of the init_sequence
+//
+/// @param init_sequence : vector with the positions from and where move the
+///                        blocks
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void move_blocks<Block_size, Group_size, Iter_t, Compare>
+::move_sequence(const std::vector<size_t> &init_sequence)
+{
+    range_buf rbuf = bk.get_range_buf();
+    size_t pos_range2 = init_sequence[0];
+
+    range_it range2 = bk.get_range(pos_range2);
+    move_forward(rbuf, range2);
+
+    for (size_t i = 1; i < init_sequence.size(); ++i)
+    {
+        pos_range2 = init_sequence[i];
+        range_it range1(range2);
+        range2 = bk.get_range(pos_range2);
+        move_forward(range1, range2);
+    };
+    move_forward(range2, rbuf);
+};
+//
+//-------------------------------------------------------------------------
+//  function : move_long_sequence
+/// @brief move the blocks, following the positions of the init_sequence.
+///        if the sequence is greater than Group_size, it is divided in small
+///        sequences, creating function_t elements, for to be inserted in the
+///        concurrent stack
+//
+/// @param init_sequence : vector with the positions from and where move the
+///                        blocks
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void move_blocks<Block_size, Group_size, Iter_t, Compare>
+::move_long_sequence(const std::vector<size_t> &init_sequence)
+{
+    if (init_sequence.size() < Group_size) return move_sequence(init_sequence);
+
+    size_t npart = (init_sequence.size() + Group_size - 1) / Group_size;
+    size_t size_part = init_sequence.size() / npart;
+    atomic_t son_counter(0);
+
+    std::vector<size_t> sequence;
+    sequence.reserve(size_part);
+
+    std::vector<size_t> index_seq;
+    index_seq.reserve(npart);
+
+    auto it_pos = init_sequence.begin();
+    for (size_t i = 0; i < (npart - 1); ++i, it_pos += size_part)
+    {
+        sequence.assign(it_pos, it_pos + size_part);
+        index_seq.emplace_back(*(it_pos + size_part - 1));
+        function_move_sequence(sequence, son_counter, bk.error);
+    };
+
+    sequence.assign(it_pos, init_sequence.end());
+    index_seq.emplace_back(init_sequence.back());
+    function_move_sequence(sequence, son_counter, bk.error);
+
+    bk.exec(son_counter);
+    if (bk.error) return;
+    move_long_sequence(index_seq);
+}
+
+//
+//****************************************************************************
+}; //    End namespace blk_detail
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp b/boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp
new file mode 100644
index 0000000000..98c0e48a5c
--- /dev/null
+++ b/boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp
@@ -0,0 +1,236 @@
+//----------------------------------------------------------------------------
+/// @file parallel_sort.hpp
+/// @brief Contains the parallel_sort class, which is part of the
+///        block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
+
+#include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
+#include <boost/sort/pdqsort/pdqsort.hpp>
+#include <boost/sort/common/pivot.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace blk_detail
+{
+
+//----------------------------------------------------------------------------
+//                          USING SENTENCES
+//----------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+namespace bscu = bsc::util;
+using bscu::nbits64;
+using bsc::pivot9;
+using boost::sort::pdqsort;
+//
+///---------------------------------------------------------------------------
+/// @struct parallel_sort
+/// @brief This class do a parallel sort, using the quicksort filtering,
+///        splitting the data until the number of elements is smaller than a
+///        predefined value (max_per_thread)
+//----------------------------------------------------------------------------
+template<uint32_t Block_size, class Iter_t, class Compare>
+struct parallel_sort
+{
+    //-------------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //-------------------------------------------------------------------------
+    typedef typename std::iterator_traits<Iter_t>::value_type value_t;
+    typedef std::atomic<uint32_t> atomic_t;
+    typedef std::function<void(void)> function_t;
+    typedef backbone<Block_size, Iter_t, Compare> backbone_t;
+
+    //------------------------------------------------------------------------
+    //                V A R I A B L E S
+    //------------------------------------------------------------------------
+    // reference to a object with all the data to sort
+    backbone_t &bk;
+
+    // maximun number of element to sort woth 1 thread
+    size_t max_per_thread;
+
+    // atomic counter for to detect the end of the works created inside
+    // the object
+    atomic_t counter;
+
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+    parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last);
+
+    void divide_sort(Iter_t first, Iter_t last, uint32_t level);
+    //
+    //------------------------------------------------------------------------
+    //  function : function_divide_sort
+    /// @brief create a function_t with a call to divide_sort, and inser in
+    ///        the stack of the backbone
+    //
+    /// @param first : iterator to the first element of the range to divide
+    /// @param last : iterator to the next element after the last element of
+    ///               the range to divide
+    /// @param level : level of depth in the division.When zero call to
+    ///                pdqsort
+    /// @param counter : atomic variable which is decremented when finish
+    ///                  the function. This variable is used for to know
+    ///                  when are finished all the function_t created
+    ///                  inside an object
+    /// @param error : global indicator of error.
+    //------------------------------------------------------------------------
+    void function_divide_sort(Iter_t first, Iter_t last, uint32_t level,
+                              atomic_t &counter, bool &error)
+    {
+        bscu::atomic_add(counter, 1);
+        function_t f1 = [this, first, last, level, &counter, &error]( )
+        {
+            if (not error)
+            {
+                try
+                {
+                    this->divide_sort (first, last, level);
+                }
+                catch (std::bad_alloc &)
+                {
+                    error = true;
+                };
+            };
+            bscu::atomic_sub (counter, 1);
+        };
+        bk.works.emplace_back(f1);
+    };
+
+//--------------------------------------------------------------------------
+};// end struct parallel_sort
+//--------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            N O N     I N L I N E      F U N C T I O N S                  ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//------------------------------------------------------------------------
+//  function : parallel_sort
+/// @brief constructor of the class
+/// @param [in] bkbn : backbone struct with all the information to sort
+/// @param [in] first : iterator to the first element to sort
+/// @param [in] last : iterator to the next element after the last
+//------------------------------------------------------------------------
+template<uint32_t Block_size, class Iter_t, class Compare>
+parallel_sort<Block_size, Iter_t, Compare>
+::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last)
+ : bk(bkbn), counter(0)
+{
+    assert((last - first) >= 0);
+    size_t nelem = size_t(last - first);
+
+    //------------------- check if sort --------------------------------------
+    bool sorted = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+         it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
+    if (sorted) return;
+
+    //------------------- check if reverse sort ---------------------------
+    sorted = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+         it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++);
+
+    if (sorted)
+    {
+        size_t nelem2 = nelem >> 1;
+        Iter_t it1 = first, it2 = last - 1;
+        for (size_t i = 0; i < nelem2; ++i)
+            std::swap(*(it1++), *(it2--));
+        return;
+    };
+
+    //-------------------max_per_thread ---------------------------
+    uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1;
+    if (nbits_size > 5) nbits_size = 5;
+    max_per_thread = 1 << (18 - nbits_size);
+
+    uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2;
+
+    //---------------- check if only single thread -----------------------
+    if (nelem < (max_per_thread))
+    {
+        pdqsort(first, last, bk.cmp);
+        return;
+    };
+    if (not bk.error) divide_sort(first, last, level);
+
+    // wait until all the parts are finished
+    bk.exec(counter);
+};
+
+//------------------------------------------------------------------------
+//  function : divide_sort
+/// @brief this function divide the data in two part, for to be sorted in
+///        a parallel mode
+/// @param first : iterator to the first element to sort
+/// @param last : iterator to the next element after the last
+/// @param level : level of depth before call to pdqsort
+//------------------------------------------------------------------------
+template<uint32_t Block_size, class Iter_t, class Compare>
+void parallel_sort<Block_size, Iter_t, Compare>
+::divide_sort(Iter_t first, Iter_t last, uint32_t level)
+{
+    //------------------- check if sort -----------------------------------
+    bool sorted = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+         it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
+    if (sorted) return;
+
+    //---------------- check if finish the subdivision -------------------
+    size_t nelem = last - first;
+    if (level == 0 or nelem < (max_per_thread))
+    {
+        return pdqsort(first, last, bk.cmp);
+    };
+
+    //-------------------- pivoting  ----------------------------------
+    pivot9(first, last, bk.cmp);
+    const value_t &val = const_cast<value_t &>(*first);
+    Iter_t c_first = first + 1, c_last = last - 1;
+
+    while (bk.cmp(*c_first, val))   ++c_first;
+    while (bk.cmp(val, *c_last))    --c_last;
+
+    while (not (c_first > c_last))
+    {
+        std::swap(*(c_first++), *(c_last--));
+        while (bk.cmp(*c_first, val))
+            ++c_first;
+        while (bk.cmp(val, *c_last))
+            --c_last;
+    };
+
+    std::swap(*first, *c_last);
+
+    // insert  the work of the second half in the stack of works
+    function_divide_sort(c_first, last, level - 1, counter, bk.error);
+    if (bk.error) return;
+
+    // The first half is done by the same thread
+    function_divide_sort(first, c_last, level - 1, counter, bk.error);
+};
+//
+//****************************************************************************
+};//    End namespace blk_detail
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/block_indirect_sort/block_indirect_sort.hpp b/boost/sort/block_indirect_sort/block_indirect_sort.hpp
new file mode 100644
index 0000000000..62abde29a5
--- /dev/null
+++ b/boost/sort/block_indirect_sort/block_indirect_sort.hpp
@@ -0,0 +1,501 @@
+//----------------------------------------------------------------------------
+/// @file block_indirect_sort.hpp
+/// @brief block indirect sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
+
+#include <atomic>
+#include <boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp>
+#include <boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp>
+#include <boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp>
+#include <boost/sort/pdqsort/pdqsort.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/util/algorithm.hpp>
+#include <future>
+#include <iterator>
+
+// This value is the minimal number of threads for to use the
+// block_indirect_sort algorithm
+#define BOOST_NTHREAD_BORDER 6
+
+namespace boost
+{
+namespace sort
+{
+namespace blk_detail
+{
+//---------------------------------------------------------------------------
+//         USING SENTENCES
+//---------------------------------------------------------------------------
+namespace bs = boost::sort;
+namespace bsc = bs::common;
+namespace bscu = bsc::util;
+using bscu::compare_iter;
+using bscu::value_iter;
+using bsc::range;
+using bsc::destroy;
+using bsc::initialize;
+using bscu::nbits64;
+using bs::pdqsort;
+using bscu::enable_if_string;
+using bscu::enable_if_not_string;
+using bscu::tmsb;
+//
+///---------------------------------------------------------------------------
+/// @struct block_indirect_sort
+/// @brief This class is the entry point of the block indirect sort. The code
+///        of this algorithm is divided in several classes:
+///        bis/block.hpp : basic structures used in the algorithm
+///        bis/backbone.hpp : data used by all the classes
+///        bis/merge_blocks.hpp : merge the internal blocks
+///        bis/move_blocks.hpp : move the blocks, and obtain all the elements
+///                              phisicaly sorted
+///        bis/parallel_sort.hpp : make the parallel sort of each part in the
+///                                initial division of the data
+///
+//----------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t,
+                class Compare = compare_iter<Iter_t> >
+struct block_indirect_sort
+{
+    //------------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //------------------------------------------------------------------------
+    typedef typename std::iterator_traits<Iter_t>::value_type value_t;
+    typedef std::atomic<uint32_t> atomic_t;
+    typedef range<size_t> range_pos;
+    typedef range<Iter_t> range_it;
+    typedef range<value_t *> range_buf;
+    typedef std::function<void(void)> function_t;
+
+    // classes used in the internal operations of the algorithm
+    typedef block_pos block_pos_t;
+    typedef block<Block_size, Iter_t> block_t;
+    typedef backbone<Block_size, Iter_t, Compare> backbone_t;
+    typedef parallel_sort<Block_size, Iter_t, Compare> parallel_sort_t;
+
+    typedef merge_blocks<Block_size, Group_size, Iter_t, Compare> merge_blocks_t;
+    typedef move_blocks<Block_size, Group_size, Iter_t, Compare> move_blocks_t;
+    typedef compare_block_pos<Block_size, Iter_t, Compare> compare_block_pos_t;
+    //
+    //------------------------------------------------------------------------
+    //       V A R I A B L E S   A N D  C O N S T A N T S
+    //------------------------------------------------------------------------
+    // contains the data and the internal data structures of the algorithm for
+    // to be shared between the classes which are part of the algorithm
+    backbone_t bk;
+    // atomic counter for to detect the end of the works created inside
+    // the object
+    atomic_t counter;
+    // pointer to the uninitialized memory used for the thread buffers
+    value_t *ptr;
+    // indicate if the memory pointed by ptr is initialized
+    bool construct;
+    // range from extract the buffers for the threads
+    range_buf rglobal_buf;
+    // number of threads to use
+    uint32_t nthread;
+    //
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+
+    block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr);
+
+    block_indirect_sort(Iter_t first, Iter_t last) :
+                        block_indirect_sort(first, last, Compare(),
+                        std::thread::hardware_concurrency()) { }
+
+
+    block_indirect_sort(Iter_t first, Iter_t last, Compare cmp) :
+                        block_indirect_sort(first, last, cmp,
+                        std::thread::hardware_concurrency()) { }
+
+
+    block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) :
+                        block_indirect_sort(first, last, Compare(), nthread){}
+
+
+    //
+    //------------------------------------------------------------------------
+    //  function :destroy_all
+    /// @brief destructor all the data structures of the class (if the memory
+    ///        is constructed, is destroyed) and  return the uninitialized
+    ///        memory
+    //------------------------------------------------------------------------
+    void destroy_all(void)
+    {
+        if (ptr != nullptr)
+        {
+            if (construct)
+            {
+                destroy(rglobal_buf);
+                construct = false;
+            };
+            std::return_temporary_buffer(ptr);
+            ptr = nullptr;
+        };
+    }
+    //
+    //------------------------------------------------------------------------
+    //  function :~block_indirect_sort
+    /// @brief destructor of the class (if the memory is constructed, is
+    ///        destroyed) and  return the uninitialized memory
+    //------------------------------------------------------------------------
+    ~block_indirect_sort(void)
+    {
+        destroy_all();
+    }
+
+    void split_range(size_t pos_index1, size_t pos_index2,
+                    uint32_t level_thread);
+
+    void start_function(void);
+
+//-------------------------------------------------------------------------
+}; // End class block_indirect_sort
+//----------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            N O N     I N L I N E      F U N C T I O N S                  ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+//  function : block_indirect_sort
+/// @brief begin with the execution of the functions stored in works
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+/// @param nthr : Number of threads to use in the process.When this value
+///               is lower than 2, the sorting is done with 1 thread
+//-------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
+::block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr)
+: bk(first, last, cmp), counter(0), ptr(nullptr), construct(false),
+  nthread(nthr)
+{
+    try
+    {
+        assert((last - first) >= 0);
+        size_t nelem = size_t(last - first);
+        if (nelem == 0) return;
+
+        //------------------- check if sort -----------------------------------
+        bool sorted = true;
+        for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted =
+                        not bk.cmp(*it2, *it1)); it1 = it2++);
+        if (sorted) return;
+
+        //------------------- check if reverse sort ---------------------------
+        sorted = true;
+        for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted =
+                        not bk.cmp(*it1, *it2)); it1 = it2++);
+
+        if (sorted)
+        {
+            size_t nelem2 = nelem >> 1;
+            Iter_t it1 = first, it2 = last - 1;
+            for (size_t i = 0; i < nelem2; ++i)
+            {
+                std::swap(*(it1++), *(it2--));
+            };
+            return;
+        };
+
+        //---------------- check if only single thread -----------------------
+        size_t nthreadmax = nelem / (Block_size * Group_size) + 1;
+        if (nthread > nthreadmax) nthread = (uint32_t) nthreadmax;
+
+        uint32_t nbits_size = (nbits64(sizeof(value_t)) >> 1);
+        if (nbits_size > 5) nbits_size = 5;
+        size_t max_per_thread = 1 << (18 - nbits_size);
+
+        if (nelem < (max_per_thread) or nthread < 2)
+        {
+            //intro_sort (first, last, bk.cmp);
+            pdqsort(first, last, bk.cmp);
+            return;
+        };
+
+        //----------- creation of the temporary buffer --------------------
+        ptr = std::get_temporary_buffer<value_t>(Block_size * nthread).first;
+        if (ptr == nullptr)
+        {
+            bk.error = true;
+            throw std::bad_alloc();
+        };
+
+        rglobal_buf = range_buf(ptr, ptr + (Block_size * nthread));
+        initialize(rglobal_buf, *first);
+        construct = true;
+
+        // creation of the buffers for the threads
+        std::vector<value_t *> vbuf(nthread);
+        for (uint32_t i = 0; i < nthread; ++i)
+        {
+            vbuf[i] = ptr + (i * Block_size);
+        };
+
+        // Insert the first work in the stack
+        bscu::atomic_write(counter, 1);
+        function_t f1 = [&]( )
+        {
+            start_function ( );
+            bscu::atomic_sub (counter, 1);
+        };
+        bk.works.emplace_back(f1);
+
+        //---------------------------------------------------------------------
+        //                    PROCESS
+        //---------------------------------------------------------------------
+        std::vector<std::future<void> > vfuture(nthread);
+
+        // The function launched with the futures is "execute the functions of
+        // the stack until this->counter is zero
+        // vbuf[i] is the memory from the main thread for to configure the
+        // thread local buffer
+        for (uint32_t i = 0; i < nthread; ++i)
+        {
+            auto f1 = [=, &vbuf]( )
+            {   bk.exec (vbuf[i], this->counter);};
+            vfuture[i] = std::async(std::launch::async, f1);
+        };
+        for (uint32_t i = 0; i < nthread; ++i)
+            vfuture[i].get();
+        if (bk.error) throw std::bad_alloc();
+    }
+    catch (std::bad_alloc &)
+    {
+        destroy_all();
+        throw;
+    }
+};
+//
+//-----------------------------------------------------------------------------
+//  function : split_rage
+/// @brief this function splits a range of positions in the index, and
+///        depending of the size, sort directly or make to a recursive call
+///        to split_range
+/// @param pos_index1 : first position in the index
+/// @param pos_index2 : position after the last in the index
+/// @param level_thread : depth of the call. When 0 sort the blocks
+//-----------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
+::split_range(size_t pos_index1, size_t pos_index2, uint32_t level_thread)
+{
+    size_t nblock = pos_index2 - pos_index1;
+
+    //-------------------------------------------------------------------------
+    // In the blocks not sorted, the physical position is the logical position
+    //-------------------------------------------------------------------------
+    Iter_t first = bk.get_block(pos_index1).first;
+    Iter_t last = bk.get_range(pos_index2 - 1).last;
+
+    if (nblock < Group_size)
+    {
+        pdqsort(first, last, bk.cmp);
+        return;
+    };
+
+    size_t pos_index_mid = pos_index1 + (nblock >> 1);
+    atomic_t son_counter(1);
+
+    //-------------------------------------------------------------------------
+    // Insert in the stack the work for the second part, and the actual thread,
+    // execute the first part
+    //-------------------------------------------------------------------------
+    if (level_thread != 0)
+    {
+        auto f1 = [=, &son_counter]( )
+        {
+            split_range (pos_index_mid, pos_index2, level_thread - 1);
+            bscu::atomic_sub (son_counter, 1);
+        };
+        bk.works.emplace_back(f1);
+        if (bk.error) return;
+        split_range(pos_index1, pos_index_mid, level_thread - 1);
+    }
+    else
+    {
+        Iter_t mid = first + ((nblock >> 1) * Block_size);
+        auto f1 = [=, &son_counter]( )
+        {
+            parallel_sort_t (bk, mid, last);
+            bscu::atomic_sub (son_counter, 1);
+        };
+        bk.works.emplace_back(f1);
+        if (bk.error) return;
+        parallel_sort_t(bk, first, mid);
+    };
+    bk.exec(son_counter);
+    if (bk.error) return;
+    merge_blocks_t(bk, pos_index1, pos_index_mid, pos_index2);
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : start_function
+/// @brief this function init the process. When the number of threads is lower
+///        than a predefined value, sort the elements with a parallel pdqsort.
+//-----------------------------------------------------------------------------
+template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
+void block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
+::start_function(void)
+{
+    if (nthread < BOOST_NTHREAD_BORDER)
+    {
+        parallel_sort_t(bk, bk.global_range.first, bk.global_range.last);
+    }
+    else
+    {
+        size_t level_thread = nbits64(nthread - 1) - 1;
+        split_range(0, bk.nblock, level_thread - 1);
+        if (bk.error) return;
+        move_blocks_t k(bk);
+    };
+};
+
+///---------------------------------------------------------------------------
+//  function block_indirect_sort_call
+/// @brief This class is select the block size in the block_indirect_sort
+///        algorithm depending of the type and size of the data to sort
+///
+//----------------------------------------------------------------------------
+template <class Iter_t, class Compare,
+         enable_if_string<value_iter<Iter_t>> * = nullptr>
+inline void block_indirect_sort_call(Iter_t first, Iter_t last, Compare cmp,
+                uint32_t nthr)
+{
+    block_indirect_sort<128, 128, Iter_t, Compare>(first, last, cmp, nthr);
+};
+
+template<size_t Size>
+struct block_size
+{
+    static constexpr const uint32_t BitsSize =
+                    (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1];
+    static constexpr const uint32_t sz[10] =
+    { 4096, 4096, 4096, 4096, 2048, 1024, 768, 512, 256, 128 };
+    static constexpr const uint32_t data = sz[BitsSize];
+};
+//
+///---------------------------------------------------------------------------
+/// @struct block_indirect_sort_call
+/// @brief This class is select the block size in the block_indirect_sort
+///        algorithm depending of the type and size of the data to sort
+///
+//----------------------------------------------------------------------------
+template <class Iter_t, class Compare,
+          enable_if_not_string<value_iter<Iter_t>> * = nullptr>
+inline void block_indirect_sort_call (Iter_t first, Iter_t last, Compare cmp,
+                                      uint32_t nthr)
+{
+    block_indirect_sort<block_size<sizeof (value_iter<Iter_t> )>::data, 64,
+                        Iter_t, Compare> (first, last, cmp, nthr);
+};
+
+//
+//****************************************************************************
+}; //    End namespace blk_detail
+//****************************************************************************
+//
+namespace bscu = boost::sort::common::util;
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//               B L O C K _ I N D I R E C T _ S O R T                      ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : block_indirect_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+void block_indirect_sort(Iter_t first, Iter_t last)
+{
+    typedef bscu::compare_iter<Iter_t> Compare;
+    blk_detail::block_indirect_sort_call (first, last, Compare(),
+                                          std::thread::hardware_concurrency());
+}
+
+//
+//-----------------------------------------------------------------------------
+//  function : block_indirect_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param nthread : Number of threads to use in the process. When this value
+///                  is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+void block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread)
+{
+    typedef bscu::compare_iter<Iter_t> Compare;
+    blk_detail::block_indirect_sort_call(first, last, Compare(), nthread);
+}
+//
+//-----------------------------------------------------------------------------
+//  function : block_indirect_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Compare,
+          bscu::enable_if_not_integral<Compare> * = nullptr>
+void block_indirect_sort(Iter_t first, Iter_t last, Compare comp)
+{
+    blk_detail::block_indirect_sort_call (first, last, comp,
+                                      std::thread::hardware_concurrency());
+}
+
+//
+//-----------------------------------------------------------------------------
+//  function : block_indirect_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+/// @param nthread : Number of threads to use in the process. When this value
+///                  is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare>
+void block_indirect_sort (Iter_t first, Iter_t last, Compare comp,
+                          uint32_t nthread)
+{
+    blk_detail::block_indirect_sort_call(first, last, comp, nthread);
+}
+//
+//****************************************************************************
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/deque_cnc.hpp b/boost/sort/common/deque_cnc.hpp
new file mode 100644
index 0000000000..eb3b31ee6a
--- /dev/null
+++ b/boost/sort/common/deque_cnc.hpp
@@ -0,0 +1,366 @@
+//----------------------------------------------------------------------------
+/// @file   deque_cnc.hpp
+/// @brief  This file contains the implementation of the several types of
+///         recursive fastmutex for read and write
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __TOOLS_DEQUE_CNC_HPP
+#define __TOOLS_DEQUE_CNC_HPP
+
+#include <sort/tools/spinlock.hpp>
+#include <vector>
+#include <deque>
+
+namespace sort
+{
+namespace tools
+{
+
+//###########################################################################
+//                                                                         ##
+//    ################################################################     ##
+//    #                                                              #     ##
+//    #                      C L A S S                               #     ##
+//    #                   S T A C K _ C N C                          #     ##
+//    #                                                              #     ##
+//    ################################################################     ##
+//                                                                         ##
+//###########################################################################
+//
+//---------------------------------------------------------------------------
+/// @class  deque_cnc
+/// @brief This class is a concurrent stack controled by a spin_lock
+/// @remarks
+//---------------------------------------------------------------------------
+template<typename T, typename Allocator = std::allocator<T> >
+class deque_cnc
+{
+public:
+    //-----------------------------------------------------------------------
+    //                     D E F I N I T I O N S
+    //-----------------------------------------------------------------------
+    typedef std::deque<T, Allocator>                deque_t;
+    typedef typename deque_t::size_type             size_type;
+    typedef typename deque_t::difference_type       difference_type;
+    typedef typename deque_t::value_type            value_type;
+    typedef typename deque_t::pointer               pointer;
+    typedef typename deque_t::const_pointer         const_pointer;
+    typedef typename deque_t::reference             reference;
+    typedef typename deque_t::const_reference       const_reference;
+    typedef typename deque_t::allocator_type        allocator_type;
+
+protected:
+    //------------------------------------------------------------------------
+    //                     VARIABLES
+    //------------------------------------------------------------------------
+    deque_t dq;
+    mutable spinlock spl;
+
+public:
+    //
+    //-----------------------------------------------------------------------
+    //  C O N S T R U C T O R S     A N D    D E S T R U C T O R
+    //-----------------------------------------------------------------------
+    //
+    //-----------------------------------------------------------------------
+    //  function : deque_cnc
+    /// @brief  constructor
+    //----------------------------------------------------------------------
+    explicit inline deque_cnc(void): dq() { };
+//
+    //----------------------------------------------------------------------
+    //  function : deque_cnc
+    /// @brief  constructor
+    /// @param [in] ALLC : Allocator
+    //----------------------------------------------------------------------
+    explicit inline deque_cnc(const Allocator &ALLC): dq(ALLC){ };
+    //
+    //----------------------------------------------------------------------
+    //  function : ~deque_cnc
+    /// @brief  Destructor
+    //----------------------------------------------------------------------
+    virtual ~deque_cnc(void){ dq.clear(); };
+    //
+    //----------------------------------------------------------------------
+    //  function : clear
+    /// @brief Delete all the elements of the deque_cnc.
+    //----------------------------------------------------------------------
+    void clear(void)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.clear();
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : swap
+    /// @brief swap the data between the two deque_cnc
+    /// @param [in] A : deque_cnc to swap
+    /// @return none
+    //-----------------------------------------------------------------------
+    void swap(deque_cnc & A) noexcept
+    {
+        if (this == &A) return;
+        std::lock_guard < spinlock > S(spl);
+        dq.swap(A.dq);
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  S I Z E , M A X _ S I Z E , R E S I Z E
+    //  C A P A C I T Y , E M P T Y , R E S E R V E
+    //-----------------------------------------------------------------------
+    //
+    //------------------------------------------------------------------------
+    //  function : size
+    /// @brief return the number of elements in the deque_cnc
+    /// @return number of elements in the deque_cnc
+    //------------------------------------------------------------------------
+    size_type size(void) const noexcept
+    {
+        std::lock_guard < spinlock > S(spl);
+        return dq.size();
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function :max_size
+    /// @brief return the maximun size of the container
+    /// @return maximun size of the container
+    //------------------------------------------------------------------------
+    size_type max_size(void) const noexcept
+    {
+        std::lock_guard < spinlock > S(spl);
+        return (dq.max_size());
+    };
+    //
+    //-------------------------------------------------------------------------
+    //  function : shrink_to_fit
+    /// @brief resize the current vector size and change to size.\n
+    ///        If sz is smaller than the current size, delete elements to end\n
+    ///        If sz is greater than the current size, insert elements to the
+    ///        end with the value c
+    /// @param [in] sz : new size of the deque_cnc after the resize
+    /// @param [in] c : Value to insert if sz is greather than the current size
+    /// @return none
+    //------------------------------------------------------------------------
+    void shrink_to_fit()
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.shrink_to_fit();
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : empty
+    /// @brief indicate if the map is empty
+    /// @return true if the map is empty, false in any other case
+    //------------------------------------------------------------------------
+    bool empty(void) const noexcept
+    {
+        std::lock_guard < spinlock > S(spl);
+        return (dq.empty());
+    };
+    //---------------------------------------------------------------------------
+    //  function : push_back
+    /// @brief Insert one element in the back of the container
+    /// @param [in] D : value to insert. Can ve a value, a reference or an
+    ///                 rvalue
+    //---------------------------------------------------------------------------
+    void push_back(const value_type & D)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.push_back(D);
+    };
+
+    //------------------------------------------------------------------------
+    //  function : emplace_back
+    /// @brief Insert one element in the back of the container
+    /// @param [in] args :group of arguments for to build the object to insert
+    //-------------------------------------------------------------------------
+    template<class ... Args>
+    void emplace_back(Args && ... args)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.emplace_back(std::forward <Args>(args) ...);
+    };
+    //------------------------------------------------------------------------
+    //  function : push_back
+    /// @brief Insert one element in the back of the container
+    /// @param [in] D : deque to insert in the actual deque, inserting a copy
+    ///                  of the elements
+    /// @return reference to the deque after the insertion
+    //------------------------------------------------------------------------
+    template<class Allocator2>
+    deque_cnc & push_back(const std::deque<value_type, Allocator2> & D)
+    {
+        std::lock_guard < spinlock > S(spl);
+        for (size_type i = 0; i < D.size(); ++i)
+            dq.push_back(D[i]);
+        return *this;
+    };
+    //------------------------------------------------------------------------
+    //  function : push_back
+    /// @brief Insert one element in the back of the container
+    /// @param [in] D : deque to insert in the actual deque, inserting a move
+    ///                 of the elements
+    /// @return reference to the deque after the insertion
+    //------------------------------------------------------------------------
+    deque_cnc & push_back(std::deque<value_type, Allocator> && D)
+    {
+        std::lock_guard < spinlock > S(spl);
+        for (size_type i = 0; i < D.size(); ++i)
+            dq.emplace_back(std::move(D[i]));
+        return *this;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function :pop_back
+    /// @brief erase the last element of the container
+    //-----------------------------------------------------------------------
+    void pop_back(void)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.pop_back();
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function :pop_copy_back
+    /// @brief erase the last element and return a copy over P
+    /// @param [out] P : reference to a variable where copy the element
+    /// @return code of the operation
+    ///         true - Element erased
+    ///         false - Empty tree
+    //------------------------------------------------------------------------
+    bool pop_copy_back(value_type & P)
+    {   //-------------------------- begin -----------------------------
+        std::lock_guard < spinlock > S(spl);
+        if (dq.size() == 0) return false;
+        P = dq.back();
+        dq.pop_back();
+        return true;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function :pop_move_back
+    /// @brief erase the last element and move over P
+    /// @param [out] P : reference to a variable where move the element
+    /// @return code of the operation
+    ///         true - Element erased
+    ///         false - Empty tree
+    //------------------------------------------------------------------------
+    bool pop_move_back(value_type & P)
+    {   //-------------------------- begin -----------------------------
+        std::lock_guard < spinlock > S(spl);
+        if (dq.size() == 0) return false;
+        P = std::move(dq.back());
+        dq.pop_back();
+        return true;
+    };
+
+    //------------------------------------------------------------------------
+    //  function : push_front
+    /// @brief Insert one copy of the element in the front of the container
+    /// @param [in] D : value to insert
+    //------------------------------------------------------------------------
+    void push_front(const value_type & D)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.push_front(D);
+    };
+
+    //------------------------------------------------------------------------
+    //  function : emplace_front
+    /// @brief Insert one element in the front of the container
+    /// @param [in] args :group of arguments for to build the object to insert
+    //-------------------------------------------------------------------------
+    template<class ... Args>
+    void emplace_front(Args && ... args)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.emplace_front(std::forward <Args>(args) ...);
+    };
+    //------------------------------------------------------------------------
+    //  function : push_front
+    /// @brief Insert a copy of the elements of the deque V1 in the front
+    ///        of the container
+    /// @param [in] V1 : deque with the elements to insert
+    /// @return reference to the deque after the insertion
+    //------------------------------------------------------------------------
+    template<class Allocator2>
+    deque_cnc & push_front(const std::deque<value_type, Allocator2> & V1)
+    {
+        std::lock_guard < spinlock > S(spl);
+        for (size_type i = 0; i < V1.size(); ++i)
+            dq.push_front(V1[i]);
+        return *this;
+    };
+    //-----------------------------------------------------------------------
+    //  function : push_front
+    /// @brief Insert a move of the elements of the deque V1 in the front
+    ///        of the container
+    /// @param [in] V1 : deque with the elements to insert
+    /// @return reference to the deque after the insertion
+    //-----------------------------------------------------------------------
+    deque_cnc & push_front(std::deque<value_type, Allocator> && V1)
+    {
+        std::lock_guard < spinlock > S(spl);
+        for (size_type i = 0; i < V1.size(); ++i)
+            dq.emplace_front(std::move(V1[i]));
+        return *this;
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  function :pop_front
+    /// @brief erase the first element of the container
+    //-----------------------------------------------------------------------
+    void pop_front(void)
+    {
+        std::lock_guard < spinlock > S(spl);
+        dq.pop_front();
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  function :pop_copy_front
+    /// @brief erase the first element of the tree and return a copy over P
+    /// @param [out] P : reference to a variable where copy the element
+    /// @return code of the operation
+    ///         true- Element erased
+    ///         false - Empty tree
+    //-----------------------------------------------------------------------
+    bool pop_copy_front(value_type & P)
+    {   //-------------------------- begin -----------------------------
+        std::lock_guard < spinlock > S(spl);
+        if (dq.size() == 0) return false;
+        P = dq.front();
+        dq.pop_front();
+        return true;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function :pop_move_front
+    /// @brief erase the first element of the tree and return a move over P
+    /// @param [out] P : reference to a variable where move the element
+    /// @return code of the operation
+    ///         true- Element erased
+    ///         false - Empty tree
+    //------------------------------------------------------------------------
+    bool pop_move_front(value_type & P)
+    {   //-------------------------- begin -----------------------------
+        std::lock_guard < spinlock > S(spl);
+        if (dq.size() == 0) return false;
+        P = std::move(dq.front());
+        dq.pop_front();
+        return true;
+    };
+};
+// end class deque_cnc
+
+//***************************************************************************
+};// end namespace tools
+};// end namespace sort
+//***************************************************************************
+#endif
diff --git a/boost/sort/common/file_vector.hpp b/boost/sort/common/file_vector.hpp
new file mode 100644
index 0000000000..1dc62fc02f
--- /dev/null
+++ b/boost/sort/common/file_vector.hpp
@@ -0,0 +1,272 @@
+//----------------------------------------------------------------------------
+/// @file file_vector.hpp
+/// @brief This file contains functions for to work with random data and files
+///        Have functions for to create a vector with random data, and
+///        functions for lo load a vector of numbers or strings from the file
+///
+/// @author Copyright (c) 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_FILE_VECTOR_HPP
+#define __BOOST_SORT_COMMON_FILE_VECTOR_HPP
+
+#include <ios>
+#include <cstdio>
+#include <cstdlib>
+#include <ciso646>
+#include <vector>
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include <random>
+#include <cstdint>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+//
+//-----------------------------------------------------------------------------
+//  function : generate_file
+/// @brief Generate a binary file filed with random numbers of 64 bits
+/// @param [in] filename : name of the file
+/// @param [in] NElem : number of 64 bits numbers to insert in the file
+/// @exception
+/// @return
+/// @remarks
+//-----------------------------------------------------------------------------
+static int generate_file(const std::string & filename, size_t NElem)
+{   //------------------------------- begin ----------------------------------
+    std::ofstream ofile;
+    ofile.open(filename, std::ios_base::out | std::ios_base::binary |
+                         std::ios_base::trunc);
+    if (ofile.bad())
+    {
+        throw std::ios_base::failure("could not open file \n");
+    };
+    std::mt19937_64 my_rand(0);
+
+    for (size_t i = 0; i < NElem; ++i)
+    {
+        uint64_t Aux = my_rand();
+        ofile.write((char *) &Aux, 8);
+    }
+    ofile.close();
+    return 0;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : fill_vector_uint64
+/// @brief : fill a vector of uint64_t elements from a file
+/// @param [in] filename : name of the file
+/// @param [in] V : vector to fill
+/// @param [in] NElem : number of elements for to read from the file
+/// @exception
+/// @return
+/// @remarks
+//-----------------------------------------------------------------------------
+static int fill_vector_uint64(const std::string & filename,
+                              std::vector<uint64_t> & V, size_t NElem)
+{   //----------------------- begin ------------------------------------------
+    std::ifstream input(filename, std::ios_base::in | std::ios_base::binary);
+    if (input.fail())
+    {
+        throw std::ios_base::failure("could not open file \n");
+    };
+    //------------------------------------------------------------------------
+    // Calculate the lenght of the file and the number of elements inside
+    //------------------------------------------------------------------------
+    input.seekg(0, std::ios_base::end);
+    size_t length = input.tellg();
+    size_t uCount = length / 8;
+    if (uCount < NElem)
+    {
+        throw std::ios_base::failure("incorrect lenght of the file\n");
+    };
+    V.clear();
+    V.reserve(NElem);
+
+    uint64_t Aux = 0;
+    input.seekg(0, std::ios_base::beg);
+    for (size_t i = 0; i < NElem; ++i)
+    {
+        input.read(reinterpret_cast<char *>(&Aux), 8);
+        V.push_back(Aux);
+    };
+    input.close();
+    return 0;
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function :write_file_uint64
+/// @brief Write a file with the contnt of a vector of Uint64_t elements
+/// @param [in] V : vector from read the numbersl
+/// @param [in] filename : name of the file
+/// @exception
+/// @return
+/// @remarks
+//-----------------------------------------------------------------------------
+static int write_file_uint64 (const std::vector<uint64_t> & V,
+                              const std::string & filename)
+{   //--------------------------------- begin --------------------------------
+    std::ofstream ofile;
+    ofile.open(filename,
+                    std::ios_base::out | std::ios_base::binary
+                                    | std::ios_base::trunc);
+    if (ofile.bad())
+    {
+        throw std::ios_base::failure("could not open file \n");
+    };
+    for (size_t i = 0; i < V.size(); ++i)
+    {
+        ofile.write((char *) &(V[i]), 8);
+    }
+    ofile.close();
+    return 0;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : fill_vector_string
+/// @brief fill a vector of strings from a file
+/// @param [in] filename : name of the file from read the strings
+/// @param [in] V : vector where store the strings
+/// @param [in] NElem : Number of strings for to read from the file
+/// @exception
+/// @return
+/// @remarks
+//-----------------------------------------------------------------------------
+static int fill_vector_string (const std::string & filename,
+                               std::vector<std::string> & V, size_t NElem)
+{   //----------------------- begin ------------------------------------------
+    std::ifstream input(filename, std::ios_base::in | std::ios_base::binary);
+    if (input.fail())
+    {
+        throw std::ios_base::failure("could not open file \n");
+    };
+    //------------------------------------------------------------------------
+    // Calculate the lenght of the file and the number of elements inside
+    //------------------------------------------------------------------------
+    input.seekg(0, std::ios_base::end);
+    V.clear();
+    V.reserve(NElem);
+
+    std::string inval;
+    input.seekg(0, std::ios_base::beg);
+
+    for (size_t i = 0; i < NElem; ++i)
+    {
+        if (!input.eof())
+        {
+            input >> inval;
+            V.push_back(inval);
+            inval.clear();
+        }
+        else
+        {
+            throw std::ios_base::failure("Insuficient lenght of the file\n");
+        };
+    };
+    input.close();
+    return 0;
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function :write_file_string
+/// @brief : write a file with the strings of a vector
+/// @param [in] V : vector from read the sttrings
+/// @param [in] filename : file where store the strings
+/// @exception
+/// @return
+/// @remarks
+//-----------------------------------------------------------------------------
+static int write_file_string (const std::vector<std::string> & V,
+                             const std::string & filename)
+{   //--------------------------------- begin --------------------------------
+    std::ofstream ofile;
+    ofile.open(filename,
+                    std::ios_base::out | std::ios_base::binary
+                                    | std::ios_base::trunc);
+    if (ofile.bad())
+    {
+        throw std::ios_base::failure("could not open file \n");
+    };
+    for (size_t i = 0; i < V.size(); ++i)
+    {
+        ofile.write((char *) &(V[i][0]), V[i].size());
+        ofile.put(0x0);
+    }
+    ofile.close();
+    return 0;
+};
+//---------------------------------------------------------------------------
+/// @struct uint64_file_generator
+/// @brief This struct is a number generator from a file, with several options
+///        for to limit the numbers between 0 and Max_Val
+/// @remarks
+//---------------------------------------------------------------------------
+struct uint64_file_generator
+{   //----------------------------------------------------------------------
+    //                  VARIABLES
+    //----------------------------------------------------------------------
+    std::ifstream input;
+    size_t NMax, Pos;
+    size_t Max_Val;
+    std::string s;
+
+    //----------------------------------------------------------------------
+    //                    FUNCTIONS
+    //----------------------------------------------------------------------
+    uint64_file_generator(const std::string & filename)
+    {   //---------------------------- begin ---------------------------------
+        s = filename;
+        input.open(filename, std::ios_base::in | std::ios_base::binary);
+        if (input.fail() or input.bad())
+        {
+            throw std::ios_base::failure("could not open file \n");
+        };
+        //--------------------------------------------------------------------
+        // Calculate the lenght of the file and the number of elements inside
+        //--------------------------------------------------------------------
+        input.seekg(0, std::ios_base::end);
+        size_t length = input.tellg();
+        NMax = length / 8;
+        Pos = 0;
+        Max_Val = ~((size_t) 0);
+        input.seekg(0);
+    };
+
+    void set_max_val(size_t MV){ Max_Val = MV; };
+
+    size_t size() const { return NMax; };
+
+    uint64_t get(void)
+    {
+        uint64_t Aux;
+        input.read(reinterpret_cast<char *>(&Aux), 8);
+        return (Aux % Max_Val);
+    };
+
+    uint64_t operator ( )(){ return get(); };
+
+    void reset(void) { input.seekg(0, std::ios_base::beg); };
+
+    ~uint64_file_generator() { if (input.is_open()) input.close(); };
+};
+//
+//****************************************************************************
+};// end namespace benchmark
+};// end namespace sort
+};// end namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/indirect.hpp b/boost/sort/common/indirect.hpp
new file mode 100644
index 0000000000..a55ef82023
--- /dev/null
+++ b/boost/sort/common/indirect.hpp
@@ -0,0 +1,153 @@
+//----------------------------------------------------------------------------
+/// @file indirect.hpp
+/// @brief Indirect algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_COMMON_INDIRECT_HPP
+#define __BOOST_SORT_PARALLEL_COMMON_INDIRECT_HPP
+
+//#include <boost/sort/common/atomic.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <functional>
+#include <iterator>
+#include <type_traits>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+//
+//---------------------------------------------------------------------------
+/// @struct less_ptr_no_null
+///
+/// @remarks this is the comparison object for pointers. Compare the objects
+///          pointed by the iterators
+//---------------------------------------------------------------------------
+template<class Iter_t, class Compare = util::compare_iter<Iter_t> >
+struct less_ptr_no_null
+{
+    //----------------------------- Variables -----------------------
+    Compare comp; // comparison object of the elements pointed by Iter_t
+
+    //------------------------------------------------------------------------
+    //  function : less_ptr_no_null
+    /// @brief constructor from a Compare object
+    /// @param C1 : comparison object
+    //-----------------------------------------------------------------------
+    less_ptr_no_null(Compare C1 = Compare()): comp(C1) { };
+
+    //------------------------------------------------------------------------
+    //  function : operator ( )
+    /// @brief Make the comparison of the objects pointed by T1 and T2, using
+    //         the internal comp
+    //
+    /// @param  T1 : first iterator
+    /// @param  T2 : second iterator
+    /// @return bool result of the comparison
+    //-----------------------------------------------------------------------
+    bool operator( )(Iter_t T1, Iter_t T2) const
+    {
+        return comp(*T1, *T2);
+    };
+};
+//
+//-----------------------------------------------------------------------------
+//  function : create_index
+/// @brief From a vector of objects, create a vector of iterators to
+///        the objects
+///
+/// @param first : iterator to the first element of the range
+/// @param last : iterator to the element after the last of the range
+/// @param index : vector where store the iterators
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+static void create_index(Iter_t first, Iter_t last, std::vector<Iter_t> &index)
+{
+    auto nelem = last - first;
+    assert(nelem >= 0);
+    index.clear();
+    index.reserve(nelem);
+    for (; first != last; ++first) index.push_back(first);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : sort_index
+/// @brief This function transform a logical sort of the elements in the index
+///        in a physical sort
+//
+/// @param global_first : iterator to the first element of the data
+/// @param [in] index : vector of the iterators
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+static void sort_index(Iter_t global_first, std::vector<Iter_t> &index)
+{
+    typedef util::value_iter<Iter_t> value_t;
+
+    size_t pos_dest = 0;
+    size_t pos_src = 0;
+    size_t pos_in_vector = 0;
+    size_t nelem = index.size();
+    Iter_t it_dest, it_src;
+
+    while (pos_in_vector < nelem)
+    {
+        while (pos_in_vector < nelem and
+               (size_t(index[pos_in_vector] - global_first)) == pos_in_vector)
+        {
+            ++pos_in_vector;
+        };
+
+        if (pos_in_vector == nelem) return;
+        pos_dest = pos_src = pos_in_vector;
+        it_dest = global_first + pos_dest;
+        value_t Aux = std::move(*it_dest);
+
+        while ((pos_src = (size_t(index[pos_dest] - global_first)))
+               != pos_in_vector)
+        {
+            index[pos_dest] = it_dest;
+            it_src = global_first + pos_src;
+            *it_dest = std::move(*it_src);
+            it_dest = it_src;
+            pos_dest = pos_src;
+        };
+
+        *it_dest = std::move(Aux);
+        index[pos_dest] = it_dest;
+        ++pos_in_vector;
+    };
+};
+
+template<class func, class Iter_t, class Compare = compare_iter<Iter_t> >
+static void indirect_sort(func method, Iter_t first, Iter_t last, Compare comp)
+{
+    auto nelem = (last - first);
+    assert(nelem >= 0);
+    if (nelem < 2) return;
+    std::vector<Iter_t> index;
+    index.reserve((size_t) nelem);
+    create_index(first, last, index);
+    less_ptr_no_null<Iter_t, Compare> index_comp(comp);
+    method(index.begin(), index.end(), index_comp);
+    sort_index(first, index);
+};
+
+//
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/int_array.hpp b/boost/sort/common/int_array.hpp
new file mode 100644
index 0000000000..22c3b0c5a4
--- /dev/null
+++ b/boost/sort/common/int_array.hpp
@@ -0,0 +1,75 @@
+//----------------------------------------------------------------------------
+/// @file int_array.hpp
+/// @brief This file contains the struct int_array , which is an array of
+///        uint64_t elements, being the template parameter NN the number of
+///        elements in the array
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_INT_ARRAY_HPP
+#define __BOOST_SORT_COMMON_INT_ARRAY_HPP
+
+#include <cstdint>
+#include <iostream>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+template<uint32_t NN>
+struct int_array
+{
+    uint64_t M[NN];
+
+    template<class generator>
+    static int_array<NN> generate(generator & gen)
+    {
+        int_array<NN> result;
+        for (uint32_t i = 0; i < NN; ++i)
+        {
+            result.M[i] = gen();
+        };
+        return result;
+    };
+
+    uint64_t counter(void) const
+    {
+        uint64_t Acc = M[0];
+        for (uint32_t i = 1; i < NN; Acc += M[i++])
+            ;
+        return Acc;
+    };
+};
+
+template<class IA>
+struct H_comp
+{
+    bool operator ( )(const IA & A1, const IA & A2) const
+    {
+        return (A1.counter() < A2.counter());
+    };
+};
+
+template<class IA>
+struct L_comp
+{
+    bool operator ( )(const IA & A1, const IA & A2) const
+    {
+        return (A1.M[0] < A2.M[0]);
+    };
+};
+//***************************************************************************
+};//    End namespace benchmark
+};//    End namespace sort
+};//    End namespace boost
+//***************************************************************************
+#endif // end of int_array.hpp
diff --git a/boost/sort/common/merge_block.hpp b/boost/sort/common/merge_block.hpp
new file mode 100644
index 0000000000..9a7b118270
--- /dev/null
+++ b/boost/sort/common/merge_block.hpp
@@ -0,0 +1,418 @@
+//----------------------------------------------------------------------------
+/// @file merge_block.hpp
+/// @brief This file constains the class merge_block, which is part of the
+///        block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_MERGE_BLOCK_HPP
+#define __BOOST_SORT_COMMON_MERGE_BLOCK_HPP
+
+#include <boost/sort/common/range.hpp>
+#include <boost/sort/common/rearrange.hpp>
+#include <boost/sort/common/util/merge.hpp>
+#include <boost/sort/common/util/traits.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+///---------------------------------------------------------------------------
+/// @struct merge_block
+/// @brief This contains all the information shared betwen the classes of the
+///        block indirect sort algorithm
+
+//----------------------------------------------------------------------------
+template<class Iter_t, class Compare, uint32_t Power2 = 10>
+struct merge_block
+{
+    //-------------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //-------------------------------------------------------------------------
+    typedef util::value_iter<Iter_t>                    value_t;
+    typedef range<size_t>                               range_pos;
+    typedef range<Iter_t>                               range_it;
+    typedef range<value_t *>                            range_buf;
+    typedef typename std::vector<size_t>::iterator      it_index;
+    typedef util::circular_buffer<value_t, Power2 + 1>  circular_t;
+
+    //------------------------------------------------------------------------
+    //                          CONSTANTS
+    //------------------------------------------------------------------------
+    const size_t BLOCK_SIZE = (size_t) 1 << Power2;
+    const size_t LOG_BLOCK = Power2;
+
+    //------------------------------------------------------------------------
+    //                V A R I A B L E S
+    //------------------------------------------------------------------------
+    // range with all the element to sort
+    range<Iter_t> global_range;
+
+    // index vector of block_pos elements
+    std::vector<size_t> index;
+
+    // Number of elements to sort
+    size_t nelem;
+
+    // Number of blocks to sort
+    size_t nblock;
+
+    // Number of elements in the last block (tail)
+    size_t ntail;
+
+    // object for to compare two elements
+    Compare cmp;
+
+    // range  of elements of the last block (tail)
+    range_it range_tail;
+
+    // circular buffer
+    circular_t * ptr_circ;
+
+    // indicate  if the circulr buffer is owned  by the data structure
+    // or is received as parameter
+    bool owned;
+
+    //
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+    //
+    //------------------------------------------------------------------------
+    //  function : merge_block
+    /// @brief constructor of the class
+    //
+    /// @param first : iterator to the first element of the range to sort
+    /// @param last : iterator after the last element to the range to sort
+    /// @param comp : object for to compare two elements pointed by Iter_t
+    ///               iterators
+    //------------------------------------------------------------------------
+    merge_block (Iter_t first, Iter_t last, Compare comp,
+                 circular_t *pcirc_buffer)
+    : global_range(first, last), cmp(comp), ptr_circ(pcirc_buffer),
+      owned(pcirc_buffer == nullptr)
+    {
+        assert((last - first) >= 0);
+        if (first == last) return; // nothing to do
+
+        nelem = size_t(last - first);
+        nblock = (nelem + BLOCK_SIZE - 1) / BLOCK_SIZE;
+        ntail = (nelem % BLOCK_SIZE);
+        index.reserve(nblock + 1);
+
+        for (size_t i = 0; i < nblock; ++i)
+            index.emplace_back(i);
+
+        range_tail.first = first + ((nblock - 1) << LOG_BLOCK);
+        range_tail.last = last;
+        if (owned)
+        {
+            ptr_circ = new circular_t;
+            ptr_circ->initialize(*first);
+        };
+    }
+
+    merge_block(Iter_t first, Iter_t last, Compare comp)
+                    : merge_block(first, last, comp, nullptr) { };
+
+    ~ merge_block()
+    {
+        if (ptr_circ != nullptr and owned)
+        {
+            delete ptr_circ;
+            ptr_circ = nullptr;
+        };
+    };
+    //-------------------------------------------------------------------------
+    //  function : get_range
+    /// @brief obtain the range in the position pos
+    /// @param pos : position of the range
+    /// @return range required
+    //-------------------------------------------------------------------------
+    range_it get_range(size_t pos) const
+    {
+        Iter_t it1 = global_range.first + (pos << LOG_BLOCK);
+        Iter_t it2 = (pos == (nblock - 1)) ?
+                        global_range.last : it1 + BLOCK_SIZE;
+        return range_it(it1, it2);
+    };
+    //-------------------------------------------------------------------------
+    //  function : get_group_range
+    /// @brief obtain the range of the contiguous blocks beginning in the
+    //         position pos
+    /// @param pos : position of the first range
+    /// @param nrange : number of ranges of the group
+    /// @return range required
+    //-------------------------------------------------------------------------
+    range_it get_group_range(size_t pos, size_t nrange) const
+    {
+        Iter_t it1 = global_range.first + (pos << LOG_BLOCK);
+
+        Iter_t it2 = ((pos + nrange) == nblock)?global_range.last: global_range.first + ((pos + nrange) << LOG_BLOCK);
+        //Iter_t it2 = global_range.first + ((pos + nrange) << LOG_BLOCK);
+        //if ((pos + nrange) == nblock) it2 = global_range.last;
+
+        return range_it(it1, it2);
+    };
+    //-------------------------------------------------------------------------
+    //  function : is_tail
+    /// @brief indicate if a block is the tail
+    /// @param pos : position of the block
+    /// @return true : taiol  false : not tail
+    //-------------------------------------------------------------------------
+    bool is_tail(size_t pos) const
+    {
+        return (pos == (nblock - 1) and ntail != 0);
+    };
+    //-------------------------------------------------------------------------
+    //  function :
+    /// @brief
+    /// @param
+    /// @return
+    //-------------------------------------------------------------------------
+    void merge_range_pos(it_index itx_first, it_index itx_mid,
+                         it_index itx_last);
+
+    //-------------------------------------------------------------------------
+    //  function : move_range_pos_backward
+    /// @brief Move backward the elements of a range of blocks in a index
+    /// @param itx_first : iterator to the position of the first block
+    /// @param  itx_last : itertor to the position of the last block
+    /// @param  npos : number of positions to move. Must be less than BLOCK_SIZE
+    /// @return
+    //-------------------------------------------------------------------------
+    void move_range_pos_backward(it_index itx_first, it_index itx_last,
+                                 size_t npos);
+
+    //-------------------------------------------------------------------------
+    //  function : rearrange_with_index
+    /// @brief rearrange the blocks with the relative positions of the index
+    /// @param
+    /// @param
+    /// @param
+    /// @return
+    //-------------------------------------------------------------------------
+    void rearrange_with_index(void);
+
+//---------------------------------------------------------------------------
+};// end struct merge_block
+//---------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//           N O N     I N L I N E     F U N C T IO N S                     ##
+//                                                                          ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+//  function :
+/// @brief
+/// @param
+/// @return
+//-------------------------------------------------------------------------
+template<class Iter_t, class Compare, uint32_t Power2>
+void merge_block<Iter_t, Compare, Power2>
+::merge_range_pos(it_index itx_first, it_index itx_mid,it_index itx_last)
+{
+    assert((itx_last - itx_mid) >= 0 and (itx_mid - itx_first) >= 0);
+
+    size_t nelemA = (itx_mid - itx_first), nelemB = (itx_last - itx_mid);
+    if (nelemA == 0 or nelemB == 0) return;
+
+    //-------------------------------------------------------------------
+    // Create two index with the position of the blocks to merge
+    //-------------------------------------------------------------------
+    std::vector<size_t> indexA, indexB;
+    indexA.reserve(nelemA + 1);
+    indexB.reserve(nelemB);
+
+    indexA.insert(indexA.begin(), itx_first, itx_mid);
+    indexB.insert(indexB.begin(), itx_mid, itx_last);
+
+    it_index itx_out = itx_first;
+    it_index itxA = indexA.begin(), itxB = indexB.begin();
+    range_it rngA, rngB;
+    Iter_t itA = global_range.first, itB = global_range.first;
+    bool validA = false, validB = false;
+
+    while (itxA != indexA.end() and itxB != indexB.end())
+    {   //----------------------------------------------------------------
+        // Load valid ranges from the itxA and ItxB positions
+        //----------------------------------------------------------------
+        if (not validA)
+        {
+            rngA = get_range(*itxA);
+            itA = rngA.first;
+            validA = true;
+        };
+        if (not validB)
+        {
+            rngB = get_range(*itxB);
+            itB = rngB.first;
+            validB = true;
+        };
+        //----------------------------------------------------------------
+        // If don't have merge betweeen the  blocks, pass directly the
+        // position of the block to itx_out
+        //----------------------------------------------------------------
+        if (ptr_circ->size() == 0)
+        {
+            if (not cmp(*rngB.front(), *rngA.back()))
+            {
+                *(itx_out++) = *(itxA++);
+                validA = false;
+                continue;
+            };
+            if (cmp(*rngB.back(), *rngA.front()))
+            {
+                if (not is_tail(*itxB))
+                    *(itx_out++) = *itxB;
+                else ptr_circ->push_move_back(rngB.first, rngB.size());
+                ++itxB;
+                validB = false;
+                continue;
+            };
+        };
+        //----------------------------------------------------------------
+        // Normal merge
+        //----------------------------------------------------------------
+        bool side = util::merge_circular(itA, rngA.last, itB, rngB.last,
+                        *ptr_circ, cmp, itA, itB);
+        if (side)
+        {   // rngA is finished
+            ptr_circ->pop_move_front(rngA.first, rngA.size());
+            *(itx_out++) = *(itxA++);
+            validA = false;
+        }
+        else
+        {   // rngB is finished
+            if (not is_tail(*itxB))
+            {
+                ptr_circ->pop_move_front(rngB.first, rngB.size());
+                *(itx_out++) = *itxB;
+            };
+            ++itxB;
+            validB = false;
+        };
+    }; // end while
+
+    if (itxA == indexA.end())
+    {   // the index A is finished
+        rngB = get_range(*itxB);
+        ptr_circ->pop_move_front(rngB.first, ptr_circ->size());
+        while (itxB != indexB.end())
+            *(itx_out++) = *(itxB++);
+    }
+    else
+    {   // The list B is finished
+        rngA = get_range(*itxA);
+        if (ntail != 0 and indexB.back() == (nblock - 1)) // exist tail
+        {   // add the tail block to indexA, and shift the element
+            indexA.push_back(indexB.back());
+            size_t numA = size_t(itA - rngA.first);
+            ptr_circ->pop_move_back(rngA.first, numA);
+            move_range_pos_backward(itxA, indexA.end(), ntail);
+        };
+
+        ptr_circ->pop_move_front(rngA.first, ptr_circ->size());
+        while (itxA != indexA.end())
+            *(itx_out++) = *(itxA++);
+    };
+};
+
+//-------------------------------------------------------------------------
+//  function : move_range_pos_backward
+/// @brief Move backward the elements of a range of blocks in a index
+/// @param itx_first : iterator to the position of the first block
+/// @param  itx_last : itertor to the position of the last block
+/// @param  npos : number of positions to move. Must be less than BLOCK_SIZE
+/// @return
+//-------------------------------------------------------------------------
+template<class Iter_t, class Compare, uint32_t Power2>
+void merge_block<Iter_t, Compare, Power2>
+::move_range_pos_backward(it_index itx_first, it_index itx_last, size_t npos)
+{
+    assert((itx_last - itx_first) >= 0 and npos <= BLOCK_SIZE);
+
+    //--------------------------------------------------------------------
+    // Processing the last block. Must be ready fore to accept npos
+    // elements from the upper block
+    //--------------------------------------------------------------------
+    range_it rng1 = get_range(*(itx_last - 1));
+    assert(rng1.size() >= npos);
+    if (rng1.size() > npos)
+    {
+        size_t nmove = rng1.size() - npos;
+        util::move_backward(rng1.last, rng1.first, rng1.first + nmove);
+    };
+    //--------------------------------------------------------------------
+    // Movement of elements between blocks
+    //--------------------------------------------------------------------
+    for (it_index itx = itx_last - 1; itx != itx_first;)
+    {
+        --itx;
+        range_it rng2 = rng1;
+        rng1 = get_range(*itx);
+        Iter_t it_mid1 = rng1.last - npos, it_mid2 = rng2.first + npos;
+        util::move_backward(it_mid2, it_mid1, rng1.last);
+        util::move_backward(rng1.last, rng1.first, it_mid1);
+    };
+};
+//-------------------------------------------------------------------------
+//  function : rearrange_with_index
+/// @brief rearrange the blocks with the relative positions of the index
+/// @param
+/// @param
+/// @param
+/// @return
+//-------------------------------------------------------------------------
+template<class Iter_t, class Compare, uint32_t Power2>
+void merge_block<Iter_t, Compare, Power2>
+::rearrange_with_index(void)
+{   //--------------------------------------------------------------------
+    //                     Code
+    //--------------------------------------------------------------------
+    size_t pos_dest, pos_src, pos_ini;
+    size_t nelem = index.size();
+
+    ptr_circ->clear();
+    value_t * aux = ptr_circ->get_buffer();
+    range_buf rng_buf(aux, aux + ptr_circ->NMAX);
+
+    pos_ini = 0;
+    while (pos_ini < nelem)
+    {
+        while (pos_ini < nelem and index[pos_ini] == pos_ini)
+            ++pos_ini;
+        if (pos_ini == nelem) return;
+        pos_dest = pos_src = pos_ini;
+        rng_buf = move_forward(rng_buf, get_range(pos_ini));
+        pos_src = index[pos_ini];
+
+        while (pos_src != pos_ini)
+        {
+            move_forward(get_range(pos_dest), get_range(pos_src));
+            index[pos_dest] = pos_dest;
+            pos_dest = pos_src;
+            pos_src = index[pos_src];
+        };
+        move_forward(get_range(pos_dest), rng_buf);
+        index[pos_dest] = pos_dest;
+        ++pos_ini;
+    };
+};
+
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+#endif
diff --git a/boost/sort/common/merge_four.hpp b/boost/sort/common/merge_four.hpp
new file mode 100644
index 0000000000..edfb2ffc72
--- /dev/null
+++ b/boost/sort/common/merge_four.hpp
@@ -0,0 +1,327 @@
+//----------------------------------------------------------------------------
+/// @file merge_four.hpp
+/// @brief This file have the functions for to merge 4 buffers
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_FOUR_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_FOUR_HPP
+
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/range.hpp>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+//
+//############################################################################
+//                                                                          ##
+//                       F U S I O N     O F                                ##
+//                                                                          ##
+//              F O U R     E L E M E N T S    R A N G E                    ##
+//                                                                          ##
+//############################################################################
+//
+
+//-----------------------------------------------------------------------------
+//  function : less_range
+/// @brief Compare the elements pointed by it1 and it2, and if they
+///        are equals, compare their position, doing a stable comparison
+///
+/// @param it1 : iterator to the first element
+/// @param pos1 : position of the object pointed by it1
+/// @param it2 : iterator to the second element
+/// @param pos2 : position of the element pointed by it2
+/// @param comp : comparison object
+/// @return result of the comparison
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = typename util::compare_iter<Iter_t> >
+inline bool less_range(Iter_t it1, uint32_t pos1, Iter_t it2, uint32_t pos2,
+                       Compare comp = Compare())
+{
+    return (comp(*it1, *it2)) ? true :
+           (pos2 < pos1) ? false : not (comp(*it2, *it1));
+};
+
+//-----------------------------------------------------------------------------
+//  function : full_merge4
+/// @brief Merge four ranges
+///
+/// @param dest: range where move the elements merged. Their size must be
+///              greater or equal than the sum of the sizes of the ranges
+///              in vrange_input
+/// @param vrange_input : array of ranges to merge
+/// @param nrange_input : number of ranges in vrange_input
+/// @param comp : comparison object
+/// @return range with all the elements moved with the size adjusted
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+range<Iter1_t> full_merge4(const range<Iter1_t> &rdest,
+                           range<Iter2_t> vrange_input[4],
+                           uint32_t nrange_input, Compare comp)
+{
+    typedef range<Iter1_t> range1_t;
+    typedef util::value_iter<Iter1_t> type1;
+    typedef util::value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+
+    size_t ndest = 0;
+    uint32_t i = 0;
+    while (i < nrange_input)
+    {
+        if (vrange_input[i].size() != 0)
+        {
+            ndest += vrange_input[i++].size();
+        }
+        else
+        {
+            for (uint32_t k = i + 1; k < nrange_input; ++k)
+            {
+                vrange_input[k - 1] = vrange_input[k];
+            };
+            --nrange_input;
+        };
+    };
+
+    if (nrange_input == 0) return range1_t(rdest.first, rdest.first);
+    if (nrange_input == 1) return move_forward(rdest, vrange_input[0]);
+    if (nrange_input == 2)
+    {
+        return merge(rdest, vrange_input[0], vrange_input[1], comp);
+    };
+
+    //------------------------------------------------------------------------
+    // Initial sort
+    //------------------------------------------------------------------------
+    uint32_t pos[4] =
+    { 0, 1, 2, 3 }, npos = nrange_input;
+
+    //-----------------------------------------------------------------------
+    // thanks to Steven Ross by their suggestion about the optimal
+    // sorting networks
+    //-----------------------------------------------------------------------
+    if (less_range(vrange_input[pos[1]].first, pos[1],
+                    vrange_input[pos[0]].first, pos[0], comp))
+    {
+        std::swap(pos[0], pos[1]);
+    };
+    if (npos == 4 and less_range(vrange_input[pos[3]].first, pos[3],
+                                 vrange_input[pos[2]].first, pos[2], comp))
+    {
+        std::swap(pos[3], pos[2]);
+    };
+    if (less_range (vrange_input[pos[2]].first, pos[2],
+                    vrange_input[pos[0]].first, pos[0], comp))
+    {
+        std::swap(pos[0], pos[2]);
+    };
+    if (npos == 4
+                    and less_range (vrange_input[pos[3]].first, pos[3],
+                                    vrange_input[pos[1]].first, pos[1], comp))
+    {
+        std::swap(pos[1], pos[3]);
+    };
+    if (less_range (vrange_input[pos[2]].first, pos[2],
+                    vrange_input[pos[1]].first, pos[1], comp))
+    {
+        std::swap(pos[1], pos[2]);
+    };
+
+    Iter1_t it_dest = rdest.first;
+    while (npos > 2)
+    {
+        *(it_dest++) = std::move(*(vrange_input[pos[0]].first++));
+        if (vrange_input[pos[0]].size() == 0)
+        {
+            pos[0] = pos[1];
+            pos[1] = pos[2];
+            pos[2] = pos[3];
+            --npos;
+        }
+        else
+        {
+            if (less_range(vrange_input[pos[1]].first, pos[1],
+                            vrange_input[pos[0]].first, pos[0], comp))
+            {
+                std::swap(pos[0], pos[1]);
+                if (less_range(vrange_input[pos[2]].first, pos[2],
+                                vrange_input[pos[1]].first, pos[1], comp))
+                {
+                    std::swap(pos[1], pos[2]);
+                    if (npos == 4
+                                    and less_range(vrange_input[pos[3]].first,
+                                                    pos[3],
+                                                    vrange_input[pos[2]].first,
+                                                    pos[2], comp))
+                    {
+                        std::swap(pos[2], pos[3]);
+                    };
+                };
+            };
+        };
+    };
+
+    range1_t raux1(rdest.first, it_dest), raux2(it_dest, rdest.last);
+    if (pos[0] < pos[1])
+    {
+        return concat(raux1,merge(raux2, vrange_input[pos[0]], 
+                                  vrange_input[pos[1]], comp));
+    }
+    else
+    {
+        return concat(raux1, merge (raux2, vrange_input[pos[1]], 
+                                    vrange_input[pos[0]], comp));
+    };
+};
+
+//-----------------------------------------------------------------------------
+//  function : uninit_full_merge4
+/// @brief Merge four ranges and put the result in uninitialized memory
+///
+/// @param dest: range where create and move the elements merged. Their
+///              size must be greater or equal than the sum of the sizes
+///              of the ranges in the array R
+/// @param vrange_input : array of ranges to merge
+/// @param nrange_input : number of ranges in vrange_input
+/// @param comp : comparison object
+/// @return range with all the elements move with the size adjusted
+//-----------------------------------------------------------------------------
+template<class Value_t, class Iter_t, class Compare>
+range<Value_t *> uninit_full_merge4(const range<Value_t *> &dest,
+                                    range<Iter_t> vrange_input[4],
+                                    uint32_t nrange_input, Compare comp)
+{
+    typedef util::value_iter<Iter_t> type1;
+    static_assert (std::is_same< type1, Value_t >::value,
+                    "Incompatible iterators\n");
+
+    size_t ndest = 0;
+    uint32_t i = 0;
+    while (i < nrange_input)
+    {
+        if (vrange_input[i].size() != 0)
+        {
+            ndest += vrange_input[i++].size();
+        }
+        else
+        {
+            for (uint32_t k = i + 1; k < nrange_input; ++k)
+            {
+                vrange_input[k - 1] = vrange_input[k];
+            };
+            --nrange_input;
+        };
+    };
+    if (nrange_input == 0) return range<Value_t *>(dest.first, dest.first);
+    if (nrange_input == 1) return move_construct(dest, vrange_input[0]);
+    if (nrange_input == 2)
+    {
+        return merge_construct(dest, vrange_input[0], vrange_input[1], comp);
+    };
+
+    //------------------------------------------------------------------------
+    // Initial sort
+    //------------------------------------------------------------------------
+    uint32_t pos[4] = { 0, 1, 2, 3 }, npos = nrange_input;
+
+    //-----------------------------------------------------------------------
+    // thanks to Steven Ross by their suggestion about the optimal
+    // sorting networks
+    //-----------------------------------------------------------------------
+    if (less_range(vrange_input[pos[1]].first, pos[1],
+                    vrange_input[pos[0]].first, pos[0], comp))
+    {
+        std::swap(pos[0], pos[1]);
+    };
+    if (npos == 4  and less_range(vrange_input[pos[3]].first, pos[3],
+                                  vrange_input[pos[2]].first, pos[2], comp))
+    {
+        std::swap(pos[3], pos[2]);
+    };
+    if (less_range(vrange_input[pos[2]].first, pos[2],
+                    vrange_input[pos[0]].first, pos[0], comp))
+    {
+        std::swap(pos[0], pos[2]);
+    };
+    if (npos == 4 and less_range(vrange_input[pos[3]].first, pos[3],
+                                 vrange_input[pos[1]].first, pos[1], comp))
+    {
+        std::swap(pos[1], pos[3]);
+    };
+    if (less_range(vrange_input[pos[2]].first, pos[2],
+                    vrange_input[pos[1]].first, pos[1], comp))
+    {
+        std::swap(pos[1], pos[2]);
+    };
+
+    Value_t *it_dest = dest.first;
+    while (npos > 2)
+    {
+        util::construct_object(&(*(it_dest++)),
+                        std::move(*(vrange_input[pos[0]].first++)));
+        if (vrange_input[pos[0]].size() == 0)
+        {
+            pos[0] = pos[1];
+            pos[1] = pos[2];
+            pos[2] = pos[3];
+            --npos;
+        }
+        else
+        {
+            if (less_range (vrange_input[pos[1]].first, pos[1],
+                            vrange_input[pos[0]].first, pos[0], comp))
+            {
+                std::swap(pos[0], pos[1]);
+                if (less_range (vrange_input[pos[2]].first, pos[2],
+                                vrange_input[pos[1]].first, pos[1], comp))
+                {
+                    std::swap(pos[1], pos[2]);
+                    if (npos == 4 and less_range(vrange_input[pos[3]].first,
+                                                 pos[3],
+                                                 vrange_input[pos[2]].first,
+                                                 pos[2], comp))
+                    {
+                        std::swap(pos[2], pos[3]);
+                    };
+                };
+            };
+        };
+    }; // end while (npos > 2)
+
+    range<Value_t *> raux1(dest.first, it_dest), raux2(it_dest, dest.last);
+    if (pos[0] < pos[1])
+    {
+        return concat(raux1,
+                      merge_construct(raux2, vrange_input[pos[0]],
+                                      vrange_input[pos[1]], comp));
+    }
+    else
+    {
+        return concat(raux1,
+                      merge_construct(raux2, vrange_input[pos[1]],
+                                      vrange_input[pos[0]], comp));
+    };
+};
+
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/merge_vector.hpp b/boost/sort/common/merge_vector.hpp
new file mode 100644
index 0000000000..84afea5a5e
--- /dev/null
+++ b/boost/sort/common/merge_vector.hpp
@@ -0,0 +1,196 @@
+//----------------------------------------------------------------------------
+/// @file merge_vector.hpp
+/// @brief In this file have the functions for to do a stable merge of
+//         ranges, in a vector
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_VECTOR_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_VECTOR_HPP
+
+#include <boost/sort/common/merge_four.hpp>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+//############################################################################
+//                                                                          ##
+//                       F U S I O N     O F                                ##
+//                                                                          ##
+//              A  V E C T O R   O F    R A N G E S                         ##
+//                                                                          ##
+//############################################################################
+
+//
+//-----------------------------------------------------------------------------
+//  function : merge_level4
+/// @brief merge the ranges in the vector v_input with the full_merge4 function.
+///        The v_output vector is used as auxiliary memory in the internal
+///        process. The final results is in the dest range.
+///        All the ranges of v_output are inside the range dest
+/// @param dest : range where move the elements merged
+/// @param v_input : vector of ranges to merge
+/// @param v_output : vector of ranges obtained
+/// @param comp : comparison object
+/// @return range with all the elements moved
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+void merge_level4(range<Iter1_t> dest, std::vector<range<Iter2_t> > &v_input,
+                  std::vector<range<Iter1_t> > &v_output, Compare comp)
+{
+    typedef range<Iter1_t> range1_t;
+    typedef util::value_iter<Iter1_t> type1;
+    typedef util::value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+
+    v_output.clear();
+    if (v_input.size() == 0) return;
+    if (v_input.size() == 1)
+    {
+        v_output.emplace_back(move_forward(dest, v_input[0]));
+        return;
+    };
+
+    uint32_t nrange = v_input.size();
+    uint32_t pos_ini = 0;
+    while (pos_ini < v_input.size())
+    {
+        uint32_t nmerge = (nrange + 3) >> 2;
+        uint32_t nelem = (nrange + nmerge - 1) / nmerge;
+        range1_t rz = full_merge4(dest, &v_input[pos_ini], nelem, comp);
+        v_output.emplace_back(rz);
+        dest.first = rz.last;
+        pos_ini += nelem;
+        nrange -= nelem;
+    };
+    return;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : uninit_merge_level4
+/// @brief merge the ranges moving the objects and constructing them in
+///        uninitialized memory, in the vector v_input
+///        using full_merge4. The v_output vector is used as auxiliary memory
+///        in the internal process. The final results is in the dest range.
+///        All the ranges of v_output are inside the range dest
+///
+/// @param dest : range where move the elements merged
+/// @param v_input : vector of ranges to merge
+/// @param v_output : vector of ranges obtained
+/// @param comp : comparison object
+/// @return range with all the elements moved and constructed
+//-----------------------------------------------------------------------------
+template<class Value_t, class Iter_t, class Compare>
+void uninit_merge_level4(range<Value_t *> dest,
+                         std::vector<range<Iter_t> > &v_input,
+                         std::vector<range<Value_t *> > &v_output, Compare comp)
+{
+    typedef range<Value_t *> range1_t;
+    typedef util::value_iter<Iter_t> type1;
+    static_assert (std::is_same< type1, Value_t >::value,
+                    "Incompatible iterators\n");
+
+    v_output.clear();
+    if (v_input.size() == 0) return;
+    if (v_input.size() == 1)
+    {
+        v_output.emplace_back(move_construct(dest, v_input[0]));
+        return;
+    };
+
+    uint32_t nrange = v_input.size();
+    uint32_t pos_ini = 0;
+    while (pos_ini < v_input.size())
+    {
+        uint32_t nmerge = (nrange + 3) >> 2;
+        uint32_t nelem = (nrange + nmerge - 1) / nmerge;
+        range1_t rz = uninit_full_merge4(dest, &v_input[pos_ini], nelem, comp);
+        v_output.emplace_back(rz);
+        dest.first = rz.last;
+        pos_ini += nelem;
+        nrange -= nelem;
+    };
+    return;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : merge_vector4
+/// @brief merge the ranges in the vector v_input using the merge_level4
+///        function. The v_output vector is used as auxiliary memory in the
+///        internal process
+///        The final results is in the range_output range.
+///        All the ranges of v_output are inside the range range_output
+///        All the ranges of v_input are inside the range range_input
+/// @param range_input : range including all the ranges of v_input
+/// @param ange_output : range including all the elements of v_output
+/// @param v_input : vector of ranges to merge
+/// @param v_output : vector of ranges obtained
+/// @param comp : comparison object
+/// @return range with all the elements moved
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+range<Iter2_t> merge_vector4(range<Iter1_t> range_input,
+                             range<Iter2_t> range_output,
+                             std::vector<range<Iter1_t> > &v_input,
+                             std::vector<range<Iter2_t> > &v_output,
+                             Compare comp)
+{
+    typedef range<Iter2_t> range2_t;
+    typedef util::value_iter<Iter1_t> type1;
+    typedef util::value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+
+    v_output.clear();
+    if (v_input.size() == 0)
+    {
+        return range2_t(range_output.first, range_output.first);
+    };
+    if (v_input.size() == 1)
+    {
+        return move_forward(range_output, v_input[0]);
+    };
+    bool sw = false;
+    uint32_t nrange = v_input.size();
+
+    while (nrange > 1)
+    {
+        if (sw)
+        {
+            merge_level4(range_input, v_output, v_input, comp);
+            sw = false;
+            nrange = v_input.size();
+        }
+        else
+        {
+            merge_level4(range_output, v_input, v_output, comp);
+            sw = true;
+            nrange = v_output.size();
+        };
+    };
+    return (sw) ? v_output[0] : move_forward(range_output, v_input[0]);
+};
+
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/pivot.hpp b/boost/sort/common/pivot.hpp
new file mode 100644
index 0000000000..5182fbd273
--- /dev/null
+++ b/boost/sort/common/pivot.hpp
@@ -0,0 +1,122 @@
+//----------------------------------------------------------------------------
+/// @file pivot.hpp
+/// @brief This file contains the description of several low level algorithms
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_PIVOT_HPP
+#define __BOOST_SORT_COMMON_PIVOT_HPP
+
+#include <cstdint>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+//
+//##########################################################################
+//                                                                        ##
+//                    G L O B A L     V A R I B L E S                     ##
+//                                                                        ##
+//##########################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : mid3
+/// @brief : return the iterator to the mid value of the three values passsed
+///          as parameters
+//
+/// @param iter_1 : iterator to the first value
+/// @param iter_2 : iterator to the second value
+/// @param iter_3 : iterator to the third value
+/// @param comp : object for to compare two values
+/// @return iterator to mid value
+//-----------------------------------------------------------------------------
+template < typename Iter_t, typename Compare >
+inline Iter_t mid3 (Iter_t iter_1, Iter_t iter_2, Iter_t iter_3, Compare comp)
+{
+    return comp (*iter_1, *iter_2)
+           ? (comp (*iter_2, *iter_3)?
+             iter_2 : (comp (*iter_1, *iter_3) ? iter_3 : iter_1))
+           : (comp (*iter_3, *iter_2)?
+             iter_2 : (comp (*iter_3, *iter_1) ? iter_3 : iter_1));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : pivot3
+/// @brief : receive a range between first and last, calcule the mid iterator
+///          with the first, the previous to the last, and the central
+///          position. With this mid iterator swap with the first position
+//
+/// @param first : iterator to the first element
+/// @param last : iterator to the last element
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+inline void pivot3 (Iter_t first, Iter_t last, Compare comp)
+{
+    auto N2 = (last - first) >> 1;
+    Iter_t it_val = mid3 (first + 1, first + N2, last - 1, comp);
+    std::swap (*first, *it_val);
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : mid9
+/// @brief : return the iterator to the mid value of the nine values passsed
+///          as parameters
+//
+/// @param iter_1 : iterator to the first value
+/// @param iter_2 : iterator to the second value
+/// @param iter_3 : iterator to the third value
+/// @param iter_4 : iterator to the fourth value
+/// @param iter_5 : iterator to the fifth value
+/// @param iter_6 : iterator to the sixth value
+/// @param iter_7 : iterator to the seventh value
+/// @param iter_8 : iterator to the eighth value
+/// @param iter_9 : iterator to the ninth value
+/// @return iterator to the mid value
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+inline Iter_t mid9 (Iter_t iter_1, Iter_t iter_2, Iter_t iter_3, Iter_t iter_4,
+                    Iter_t iter_5, Iter_t iter_6, Iter_t iter_7, Iter_t iter_8,
+                    Iter_t iter_9, Compare comp)
+{
+    return mid3 (mid3 (iter_1, iter_2, iter_3, comp),
+                 mid3 (iter_4, iter_5, iter_6, comp),
+                 mid3 (iter_7, iter_8, iter_9, comp), comp);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : pivot9
+/// @brief : receive a range between first and last, obtain 9 values between
+///          the elements  including the first and the previous to the last.
+///          Obtain the iterator to the mid value and swap with the first
+///          position
+//
+/// @param first : iterator to the first element
+/// @param last : iterator to the last element
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+inline void pivot9 (Iter_t first, Iter_t last, Compare comp)
+{
+    size_t cupo = (last - first) >> 3;
+    Iter_t itaux = mid9 (first + 1, first + cupo, first + 2 * cupo,
+                         first + 3 * cupo, first + 4 * cupo, first + 5 * cupo,
+                         first + 6 * cupo, first + 7 * cupo, last - 1, comp);
+    std::swap (*first, *itaux);
+};
+//****************************************************************************
+}; //    End namespace common
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+#endif
diff --git a/boost/sort/common/range.hpp b/boost/sort/common/range.hpp
new file mode 100644
index 0000000000..072d98a938
--- /dev/null
+++ b/boost/sort/common/range.hpp
@@ -0,0 +1,399 @@
+//----------------------------------------------------------------------------
+/// @file range.hpp
+/// @brief Define a range [first, last), and the associated operations
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_RANGE_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_RANGE_HPP
+
+#include <boost/sort/common/util/algorithm.hpp>
+#include <boost/sort/common/util/merge.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <cassert>
+#include <functional>
+#include <memory>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+///---------------------------------------------------------------------------
+/// @struct range
+/// @brief this represent a range between two iterators
+/// @remarks
+//----------------------------------------------------------------------------
+template <class Iter_t>
+struct range
+{
+    Iter_t first, last;
+    //
+    //------------------------------------------------------------------------
+    //  function : range
+    /// @brief  empty constructor
+    //------------------------------------------------------------------------
+    range(void) { };
+    //
+    //------------------------------------------------------------------------
+    //  function : range
+    /// @brief  constructor with two parameters
+    /// @param frs : iterator to the first element
+    /// @param lst : iterator to the last element
+    //-----------------------------------------------------------------------
+    range(const Iter_t &frs, const Iter_t &lst): first(frs), last(lst) { };
+    //
+    //-----------------------------------------------------------------------
+    //  function : empty
+    /// @brief indicate if the range is empty
+    /// @return  true : empty false : not empty
+    //-----------------------------------------------------------------------
+    bool empty(void) const { return (first == last); };
+    //
+    //-----------------------------------------------------------------------
+    //  function : not_empty
+    /// @brief indicate if the range is not empty
+    /// @return  true : not empty false : empty
+    //-----------------------------------------------------------------------
+    bool not_empty(void) const {return (first != last); };
+    //
+    //-----------------------------------------------------------------------
+    //  function : valid
+    /// @brief  Indicate if the range is well constructed, and valid
+    /// @return true : valid,  false : not valid
+    //-----------------------------------------------------------------------
+    bool valid(void) const { return ((last - first) >= 0); };
+    //
+    //-----------------------------------------------------------------------
+    //  function : size
+    /// @brief  return the size of the range
+    /// @return size
+    //-----------------------------------------------------------------------
+    size_t size(void) const { return (last - first); };
+    //
+    //------------------------------------------------------------------------
+    //  function : front
+    /// @brief return an iterator to the first element of the range
+    /// @return iterator
+    //-----------------------------------------------------------------------
+    Iter_t front(void) const { return first; };
+    //
+    //-------------------------------------------------------------------------
+    //  function : back
+    /// @brief return an iterator to the last element of the range
+    /// @return iterator
+    //-------------------------------------------------------------------------
+    Iter_t back(void) const {return (last - 1); };
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : concat
+/// @brief concatenate two contiguous ranges
+/// @param it1 : first range
+/// @param it2 : second range
+/// @return  range resulting of the concatenation
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+inline range<Iter_t> concat(const range<Iter_t> &it1, const range<Iter_t> &it2)
+{
+    return range<Iter_t>(it1.first, it2.last);
+}
+;
+//
+//-----------------------------------------------------------------------------
+//  function : move_forward
+/// @brief Move initialized objets from the range src to dest
+/// @param dest : range where move the objects
+/// @param src : range from where move the objects
+/// @return range with the objects moved and the size adjusted
+//-----------------------------------------------------------------------------
+template <class Iter1_t, class Iter2_t>
+inline range<Iter2_t> move_forward(const range<Iter2_t> &dest,
+                                   const range<Iter1_t> &src)
+{
+    assert(dest.size() >= src.size());
+    Iter2_t it_aux = util::move_forward(dest.first, src.first, src.last);
+    return range<Iter2_t>(dest.first, it_aux);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : move_backward
+/// @brief Move initialized objets from the range src to dest
+/// @param dest : range where move the objects
+/// @param src : range from where move the objects
+/// @return range with the objects moved and the size adjusted
+//-----------------------------------------------------------------------------
+template <class Iter1_t, class Iter2_t>
+inline range<Iter2_t> move_backward(const range<Iter2_t> &dest,
+                                    const range<Iter1_t> &src)
+{
+    assert(dest.size() >= src.size());
+    Iter2_t it_aux = util::move_backward(dest.first + src.size(), src.first,
+                    src.last);
+    return range<Iter2_t>(dest.first, dest.src.size());
+};
+
+//-----------------------------------------------------------------------------
+//  function : uninit_move
+/// @brief Move uninitialized objets from the range src creating them in  dest
+///
+/// @param dest : range where move and create the objects
+/// @param src : range from where move the objects
+/// @return range with the objects moved and the size adjusted
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Value_t = util::value_iter<Iter_t> >
+inline range<Value_t*> move_construct(const range<Value_t*> &dest,
+                                      const range<Iter_t> &src)
+{
+    Value_t *ptr_aux = util::move_construct(dest.first, src.first, src.last);
+    return range<Value_t*>(dest.first, ptr_aux);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : destroy
+/// @brief destroy a range of objects
+/// @param rng : range to destroy
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+inline void destroy(range<Iter_t> rng)
+{
+    util::destroy(rng.first, rng.last);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : initialize
+/// @brief initialize a range of objects with the object val moving across them
+/// @param rng : range of elements not initialized
+/// @param val : object used for the initialization
+/// @return range initialized
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Value_t = util::value_iter<Iter_t> >
+inline range<Iter_t> initialize(const range<Iter_t> &rng, Value_t &val)
+{
+    util::initialize(rng.first, rng.last, val);
+    return rng;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : is_mergeable
+/// @brief : indicate if two ranges have a possible merge
+/// @param src1 : first range
+/// @param src2 : second range
+/// @param comp : object for to compare elements
+/// @return true : they can be merged
+///         false : they can't be merged
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+inline bool is_mergeable(const range<Iter1_t> &src1, const range<Iter2_t> &src2,
+                         Compare comp)
+{
+    //------------------------------------------------------------------------
+    //                  Metaprogramming
+    //------------------------------------------------------------------------
+    typedef util::value_iter<Iter1_t> type1;
+    typedef util::value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+    //------------------------------------------------------------------------
+    //                 Code
+    //------------------------------------------------------------------------
+    return comp(*(src2.front()), *(src1.back()));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : is_mergeable_stable
+/// @brief : indicate if two ranges have a possible merge
+/// @param src1 : first range
+/// @param src2 : second range
+/// @param comp : object for to compare elements
+/// @return true : they can be merged
+///         false : they can't be merged
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+inline bool is_mergeable_stable(const range<Iter1_t> &src1,
+                                const range<Iter2_t> &src2, Compare comp)
+{
+    //------------------------------------------------------------------------
+    //                  Metaprogramming
+    //------------------------------------------------------------------------
+    typedef util::value_iter<Iter1_t> type1;
+    typedef util::value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+    //------------------------------------------------------------------------
+    //                 Code
+    //------------------------------------------------------------------------
+    return not comp(*(src1.back()), *(src2.front()));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : merge
+/// @brief Merge two contiguous ranges src1 and src2, and put the result in
+///        the range dest, returning the range merged
+///
+/// @param dest : range where locate the lements merged. the size of dest
+///               must be  greater or equal than the sum of the sizes of
+///               src1 and src2
+/// @param src1 : first range to merge
+/// @param src2 : second range to merge
+/// @param comp : comparison object
+/// @return range with the elements merged and the size adjusted
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Iter3_t, class Compare>
+inline range<Iter3_t> merge(const range<Iter3_t> &dest,
+                            const range<Iter1_t> &src1,
+                            const range<Iter2_t> &src2, Compare comp)
+{
+    Iter3_t it_aux = util::merge(src1.first, src1.last, src2.first, src2.last,
+                    dest.first, comp);
+    return range<Iter3_t>(dest.first, it_aux);
+};
+
+//-----------------------------------------------------------------------------
+//  function : merge_construct
+/// @brief Merge two contiguous uninitialized ranges src1 and src2, and create
+///        and move the result in the uninitialized range dest, returning the
+///        range merged
+//
+/// @param dest : range where locate the elements merged. the size of dest
+///               must be  greater or equal than the sum of the sizes of
+///               src1 and src2. Initially is uninitialize memory
+/// @param src1 : first range to merge
+/// @param src2 : second range to merge
+/// @param comp : comparison object
+/// @return range with the elements merged and the size adjusted
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Value_t, class Compare>
+inline range<Value_t *> merge_construct(const range<Value_t *> &dest,
+                                        const range<Iter1_t> &src1,
+                                        const range<Iter2_t> &src2,
+                                        Compare comp)
+{
+    Value_t * ptr_aux = util::merge_construct(src1.first, src1.last, src2.first,
+                    src2.last, dest.first, comp);
+    return range<Value_t*>(dest.first, ptr_aux);
+};
+//
+//---------------------------------------------------------------------------
+//  function : half_merge
+/// @brief : Merge two initialized buffers. The first buffer is in a separate
+///          memory
+//
+/// @param dest : range where finish the two buffers merged
+/// @param src1 : first range to merge in a separate memory
+/// @param src2 : second range to merge, in the final part of the
+///               range where deposit the final results
+/// @param comp : object for compare two elements of the type pointed
+///               by the Iter1_t and Iter2_t
+/// @return : range with the two buffers merged
+//---------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+inline range<Iter2_t> merge_half(const range<Iter2_t> &dest,
+                                 const range<Iter1_t> &src1,
+                                 const range<Iter2_t> &src2, Compare comp)
+{
+    Iter2_t it_aux = util::merge_half(src1.first, src1.last, src2.first,
+                    src2.last, dest.first, comp);
+    return range<Iter2_t>(dest.first, it_aux);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : merge_uncontiguous
+/// @brief : merge two non contiguous ranges src1, src2, using the range
+///          aux as auxiliary memory. The results are in the original ranges
+//
+/// @param src1 : first range to merge
+/// @param src2 : second range to merge
+/// @param aux : auxiliary range used in the merge
+/// @param comp : object for to compare elements
+/// @return true : not changes done, false : changes in the buffers
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Iter3_t, class Compare>
+inline bool merge_uncontiguous(const range<Iter1_t> &src1,
+                               const range<Iter2_t> &src2,
+                               const range<Iter3_t> &aux, Compare comp)
+{
+    return util::merge_uncontiguous(src1.first, src1.last, src2.first,
+                    src2.last, aux.first, comp);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : merge_contiguous
+/// @brief : merge two contiguous ranges ( src1, src2) using buf as
+///          auxiliary memory. The results are in the same ranges
+/// @param src1 : first range to merge
+/// @param src1 : second range to merge
+/// @param buf : auxiliary memory used in the merge
+/// @param comp : object for to compare elements
+/// @return true : not changes done,   false : changes in the buffers
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+inline range<Iter1_t> merge_contiguous(const range<Iter1_t> &src1,
+                                       const range<Iter1_t> &src2,
+                                       const range<Iter2_t> &buf, Compare comp)
+{
+    util::merge_contiguous(src1.first, src1.last, src2.last, buf.first, comp);
+    return concat(src1, src2);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : merge_flow
+/// @brief : merge two ranges, as part of a merge the ranges in a list. This
+///         function reduce the number of movements compared with inplace_merge
+///         when you need to merge a sequence of ranges.
+///         This function merge the ranges rbuf and rng2, and the results
+///          are in rng1 and rbuf
+//
+/// @param rng1 : range where locate the first elements of the merge
+/// @param rbuf : range which provide the first elements, and where store
+///               the last results of the merge
+/// @param rng2 : range which provide the last elements to merge
+/// @param comp : object for to compare elements
+/// @return true : not changes done,  false : changes in the buffers
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static void merge_flow(range<Iter1_t> rng1, range<Iter2_t> rbuf,
+                       range<Iter1_t> rng2, Compare cmp)
+{
+    //-------------------------------------------------------------------------
+    //                       Metaprogramming
+    //-------------------------------------------------------------------------
+    typedef util::value_iter<Iter1_t> type1;
+    typedef util::value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                       Code
+    //-------------------------------------------------------------------------
+    range<Iter2_t> rbx(rbuf);
+    range<Iter1_t> rx1(rng1), rx2(rng2);
+    assert(rbx.size() == rx1.size() and rx1.size() == rx2.size());
+    while (rx1.first != rx1.last)
+    {
+        *(rx1.first++) = (cmp(*rbx.first, *rx2.first)) ?
+                                                    std::move(*(rbx.first++)):
+                                                    std::move(*(rx2.first++));
+    };
+    if (rx2.first == rx2.last) return;
+    if (rbx.first == rbx.last) move_forward(rbuf, rng2);
+    else                       merge_half(rbuf, rx2, rbx, cmp);
+};
+
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/rearrange.hpp b/boost/sort/common/rearrange.hpp
new file mode 100644
index 0000000000..5c65c4f2b7
--- /dev/null
+++ b/boost/sort/common/rearrange.hpp
@@ -0,0 +1,168 @@
+//----------------------------------------------------------------------------
+/// @file rearrange.hpp
+/// @brief Indirect algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_REARRANGE_HPP
+#define __BOOST_SORT_COMMON_REARRANGE_HPP
+
+//#include <boost/sort/common/atomic.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <functional>
+#include <iterator>
+#include <type_traits>
+#include <vector>
+#include <cassert>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+template<class Iter_data>
+struct filter_iterator
+{
+    //-----------------------------------------------------------------------
+    //                   Variables
+    //-----------------------------------------------------------------------
+    Iter_data origin;
+
+    //-----------------------------------------------------------------------
+    //                   Functions
+    //-----------------------------------------------------------------------
+    filter_iterator(Iter_data global_first): origin(global_first) { };
+    size_t operator ()(Iter_data itx) const
+    {
+        return size_t(itx - origin);
+    }
+};
+
+struct filter_pos
+{
+    size_t operator ()(size_t pos) const {  return pos; };
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : rearrange
+/// @brief This function transform a logical sort of the elements in the index  
+///        of iterators in a physical sort. 
+//
+/// @param global_first : iterator to the first element of the data
+/// @param [in] index : vector of the iterators
+//-----------------------------------------------------------------------------
+template<class Iter_data, class Iter_index, class Filter_pos>
+void rearrange(Iter_data global_first, Iter_index itx_first,
+               Iter_index itx_last, Filter_pos pos)
+{
+    //-----------------------------------------------------------------------
+    //                    Metaprogramming
+    //-----------------------------------------------------------------------
+    typedef util::value_iter<Iter_data>     value_data;
+    typedef util::value_iter<Iter_index>    value_index;
+
+    //-------------------------------------------------------------------------
+    //                     Code
+    //-------------------------------------------------------------------------	
+    assert((itx_last - itx_first) >= 0);
+    size_t pos_dest, pos_src, pos_ini;
+    size_t nelem = size_t(itx_last - itx_first);
+    Iter_data data = global_first;
+    Iter_index index = itx_first;
+
+    pos_ini = 0;
+    while (pos_ini < nelem)
+    {
+        while (pos_ini < nelem and pos(index[pos_ini]) == pos_ini)
+            ++pos_ini;
+        if (pos_ini == nelem) return;
+        pos_dest = pos_src = pos_ini;
+        value_data aux = std::move(data[pos_ini]);
+        value_index itx_src = std::move(index[pos_ini]);
+
+        while ((pos_src = pos(itx_src)) != pos_ini)
+        {
+            data[pos_dest] = std::move(data[pos_src]);
+            std::swap(itx_src, index[pos_src]);
+            pos_dest = pos_src;
+        };
+
+        data[pos_dest] = std::move(aux);
+        index[pos_ini] = std::move(itx_src);
+        ++pos_ini;
+    };
+};
+
+/*
+ //
+ //-----------------------------------------------------------------------------
+ //  function : rearrange_pos
+ /// @brief This function transform a logical sort of the elements in the index  
+ ///        of iterators in a physical sort. 
+ //
+ /// @param global_first : iterator to the first element of the data
+ /// @param [in] index : vector of the iterators
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Number >
+ void rearrange_pos (Iter_t global_first, std::vector< Number> &index)
+ {	
+ //-------------------------------------------------------------------------
+ //          METAPROGRAMMING AND DEFINITIONS
+ //-------------------------------------------------------------------------
+ static_assert ( std::is_integral<Number>::value, "Incompatible Types");
+ typedef iter_value< Iter_t > value_t;
+
+ //-------------------------------------------------------------------------
+ //                     CODE
+ //-------------------------------------------------------------------------
+ size_t pos_dest = 0;
+ size_t pos_src = 0;
+ size_t pos_ini = 0;
+ size_t nelem = index.size ( );
+ Iter_t it_dest (global_first), it_src(global_first);
+
+ while (pos_ini < nelem)
+ {
+ while (pos_ini < nelem and
+ index[pos_ini] == pos_ini)
+ {
+ ++pos_ini;
+ };
+
+ if (pos_ini == nelem) return;
+ pos_dest = pos_src = pos_ini;
+ it_dest = global_first + pos_dest;
+ value_t Aux = std::move (*it_dest);
+
+ while ((pos_src = index[pos_dest]) != pos_ini)
+ {
+ index[pos_dest] = it_dest - global_first;
+ it_src = global_first + pos_src;
+ *it_dest = std::move (*it_src);
+ it_dest = it_src;
+ pos_dest = pos_src;
+ };
+
+ *it_dest = std::move (Aux);
+ index[pos_dest] = it_dest - global_first;
+ ++pos_ini;
+ };
+ };
+ */
+//
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/scheduler.hpp b/boost/sort/common/scheduler.hpp
new file mode 100644
index 0000000000..33074a4534
--- /dev/null
+++ b/boost/sort/common/scheduler.hpp
@@ -0,0 +1,276 @@
+//----------------------------------------------------------------------------
+/// @file   scheduler.hpp
+/// @brief  This file contains the implementation of the scheduler for
+///         dispatch the works stored
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_SCHEDULER_HPP
+#define __BOOST_SORT_COMMON_SCHEDULER_HPP
+
+#include <boost/sort/common/spinlock.hpp>
+#include <boost/sort/common/search.hpp>
+#include <boost/sort/common/compare_traits.hpp>
+#include <scoped_allocator>
+#include <utility>
+#include <vector>
+#include <deque>
+#include <iostream>
+#include <unordered_map>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+//
+//###########################################################################
+//                                                                         ##
+//    ################################################################     ##
+//    #                                                              #     ##
+//    #           C L A S S      S C H E D U L E R                   #     ##
+//    #                                                              #     ##
+//    ################################################################     ##
+//                                                                         ##
+//###########################################################################
+
+//
+//---------------------------------------------------------------------------
+/// @class  scheduler
+/// @brief This class is a concurrent stack controled by a spin_lock
+/// @remarks
+//---------------------------------------------------------------------------
+template<typename Func_t, typename Allocator = std::allocator<Func_t> >
+struct scheduler
+{
+    //-----------------------------------------------------------------------
+    //                     D E F I N I T I O N S
+    //-----------------------------------------------------------------------
+    typedef std::scoped_allocator_adaptor <Allocator>   scoped_alloc;
+    typedef std::deque <Func_t, scoped_alloc>           deque_t;
+    typedef typename deque_t::iterator                  it_deque;
+    typedef std::thread::id                             key_t;
+    typedef std::hash <key_t>                           hash_t;
+    typedef std::equal_to <key_t>                       equal_t;
+    typedef std::unique_lock <spinlock_t>               lock_t;
+    typedef std::unordered_map <key_t, deque_t, hash_t, 
+                        equal_t, scoped_alloc>          map_t;
+    typedef typename map_t::iterator                    it_map;
+
+    //-----------------------------------------------------------------------
+    //                     V A R I A B L E S
+    //-----------------------------------------------------------------------
+    map_t mp;
+    size_t nelem;
+    mutable spinlock_t spl;
+
+    //------------------------------------------------------------------------
+    //  function : scheduler
+    /// @brief  constructor
+    //------------------------------------------------------------------------
+    scheduler(void) : mp(), nelem(0)  { };
+    //
+    //-----------------------------------------------------------------------
+    //  function : scheduler
+    /// @brief  Copy & move constructor
+    /// @param [in] VT : stack_cnc from where copy the data
+    //-----------------------------------------------------------------------
+    scheduler(scheduler && VT) = delete;
+    scheduler(const scheduler & VT) = delete;
+    //
+    //------------------------------------------------------------------------
+    //  function : ~scheduler
+    /// @brief  Destructor
+    //------------------------------------------------------------------------
+    virtual ~scheduler(void) {mp.clear();};
+    //
+    //------------------------------------------------------------------------
+    //  function : operator =
+    /// @brief Asignation operator
+    /// @param [in] VT : stack_cnc from where copy the data
+    /// @return Reference to the stack_cnc after the copy
+    //------------------------------------------------------------------------
+    scheduler & operator=(const scheduler &VT) = delete;
+    //
+    //------------------------------------------------------------------------
+    //  function : size
+    /// @brief Asignation operator
+    /// @param [in] VT : stack_cnc from where copy the data
+    /// @return Reference to the stack_cnc after the copy
+    //------------------------------------------------------------------------
+    size_t size(void) const
+    {
+        lock_t s(spl);
+        return nelem;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : clear
+    /// @brief Delete all the elements of the stack_cnc.
+    //------------------------------------------------------------------------
+    void clear_all(void)
+    {
+        lock_t s(spl);
+        mp.clear();
+        nelem = 0;
+    };
+
+    //
+    //------------------------------------------------------------------------
+    //  function : insert
+    /// @brief Insert one element in the back of the container
+    /// @param [in] D : value to insert. Can ve a value, a reference or an
+    ///                 rvalue
+    /// @return iterator to the element inserted
+    /// @remarks This operation is O ( const )
+    //------------------------------------------------------------------------
+    void insert(Func_t & f)
+    {
+        lock_t s(spl);
+        key_t th_id = std::this_thread::get_id();
+        it_map itmp = mp.find(th_id);
+        if (itmp == mp.end())
+        {
+            auto aux = mp.emplace(th_id, deque_t());
+            if (aux.second == false) throw std::bad_alloc();
+            itmp = aux.first;
+        };
+        itmp->second.emplace_back(std::move(f));
+        nelem++;
+    };
+
+    //
+    //------------------------------------------------------------------------
+    //  function :emplace
+    /// @brief Insert one element in the back of the container
+    /// @param [in] args :group of arguments for to build the object to insert
+    /// @return iterator to the element inserted
+    /// @remarks This operation is O ( const )
+    //------------------------------------------------------------------------
+    template<class ... Args>
+    void emplace(Args && ... args)
+    {
+        lock_t s(spl);
+        key_t th_id = std::this_thread::get_id();
+        it_map itmp = mp.find(th_id);
+        if (itmp == mp.end())
+        {
+            auto aux = mp.emplace(th_id, deque_t());
+            if (aux.second == false) throw std::bad_alloc();
+            itmp = aux.first;
+        };
+        itmp->second.emplace_back(std::forward <Args>(args) ...);
+        nelem++;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : insert
+    /// @brief Insert one element in the back of the container
+    /// @param [in] D : value to insert. Can ve a value, a reference or an rvalue
+    /// @return iterator to the element inserted
+    /// @remarks This operation is O ( const )
+    //------------------------------------------------------------------------
+    template<class it_func>
+    void insert_range(it_func first, it_func last)
+    {
+        //--------------------------------------------------------------------
+        //                    Metaprogramming
+        //--------------------------------------------------------------------
+        typedef value_iter<it_func> value2_t;
+        static_assert (std::is_same< Func_t, value2_t >::value,
+                        "Incompatible iterators\n");
+
+        //--------------------------------------------------------------------
+        //                     Code
+        //--------------------------------------------------------------------
+        assert((last - first) > 0);
+
+        lock_t s(spl);
+        key_t th_id = std::this_thread::get_id();
+        it_map itmp = mp.find(th_id);
+        if (itmp == mp.end())
+        {
+            auto aux = mp.emplace(th_id, deque_t());
+            if (aux.second == true) throw std::bad_alloc();
+            itmp = aux.first;
+        };
+        while (first != last)
+        {
+            itmp->second.emplace_back(std::move(*(first++)));
+            nelem++;
+        };
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : extract
+    /// @brief erase the last element of the tree and return a copy
+    /// @param [out] V : reference to a variable where copy the element
+    /// @return code of the operation
+    ///         0- Element erased
+    ///         1 - Empty tree
+    /// @remarks This operation is O(1)
+    //------------------------------------------------------------------------
+    bool extract(Func_t & f)
+    {
+        lock_t s(spl);
+        if (nelem == 0) return false;
+        key_t th_id = std::this_thread::get_id();
+        it_map itmp = mp.find(th_id);
+        if (itmp != mp.end() and not itmp->second.empty())
+        {
+            f = std::move(itmp->second.back());
+            itmp->second.pop_back();
+            --nelem;
+            return true;
+        };
+        for (itmp = mp.begin(); itmp != mp.end(); ++itmp)
+        {
+            if (itmp->second.empty()) continue;
+            f = std::move(itmp->second.back());
+            itmp->second.pop_back();
+            --nelem;
+            return true;
+        }
+        return false;
+    };
+};
+// end class scheduler
+//*************************************************************************
+//               P R I N T      F U N C T I O N S
+//************************************************************************
+template<class ... Args>
+std::ostream & operator <<(std::ostream &out, const std::deque<Args ...> & dq)
+{
+    for (uint32_t i = 0; i < dq.size(); ++i)
+        out << dq[i] << " ";
+    out << std::endl;
+    return out;
+}
+
+template<typename Func_t, typename Allocator = std::allocator<Func_t> >
+std::ostream & operator <<(std::ostream &out,
+                           const scheduler<Func_t, Allocator> &sch)
+{
+    std::unique_lock < spinlock_t > s(sch.spl);
+    out << "Nelem :" << sch.nelem << std::endl;
+    for (auto it = sch.mp.begin(); it != sch.mp.end(); ++it)
+    {
+        out << it->first << "  :" << it->second << std::endl;
+    }
+    return out;
+}
+
+//***************************************************************************
+};// end namespace common
+};// end namespace sort
+};// end namespace boost
+//***************************************************************************
+#endif
diff --git a/boost/sort/common/sort_basic.hpp b/boost/sort/common/sort_basic.hpp
new file mode 100644
index 0000000000..68a6f54048
--- /dev/null
+++ b/boost/sort/common/sort_basic.hpp
@@ -0,0 +1,334 @@
+//----------------------------------------------------------------------------
+/// @file sort_basic.hpp
+/// @brief Spin Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_SORT_BASIC_HPP
+#define __BOOST_SORT_COMMON_SORT_BASIC_HPP
+
+//#include <boost/sort/spinsort/util/indirect.hpp>
+#include <boost/sort/insert_sort/insert_sort.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/range.hpp>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+#include <cstddef>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+//----------------------------------------------------------------------------
+//                USING SENTENCES
+//----------------------------------------------------------------------------
+using boost::sort::insert_sort;
+
+//-----------------------------------------------------------------------------
+//  function : is_stable_sorted_forward
+/// @brief examine the elements in the range first, last if they are stable
+///        sorted, and return an iterator to the first element not sorted
+/// @param first : iterator to the first element in the range
+/// @param last : ierator after the last element of the range
+/// @param comp : object for to compare two elements
+/// @return iterator to the first element not stable sorted. The number of
+///         elements sorted is the iterator returned minus first
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = std::less<value_iter<Iter_t> > >
+inline Iter_t is_stable_sorted_forward (Iter_t first, Iter_t last,
+                                        Compare comp = Compare())
+{
+#ifdef __BS_DEBUG
+    assert ( (last- first) >= 0);
+#endif
+    if ((last - first) < 2) return first;
+    Iter_t it2 = first + 1;
+    for (Iter_t it1 = first; it2 != last and not comp(*it2, *it1); it1 = it2++);
+    return it2;
+}
+//-----------------------------------------------------------------------------
+//  function : is_reverse_stable_sorted_forward
+/// @brief examine the elements in the range first, last if they are reverse
+///        stable sorted, and return an iterator to the first element not
+///        reverse stable sorted
+/// @param first : iterator to the first element in the range
+/// @param last : ierator after the last element of the range
+/// @param comp : object for to compare two elements
+/// @return iterator to the first element not  reverse stable sorted. The number
+///         of elements sorted is the iterator returned minus first
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = std::less<value_iter<Iter_t> > >
+inline Iter_t is_reverse_stable_sorted_forward(Iter_t first, Iter_t last,
+                                               Compare comp = Compare())
+{
+#ifdef __BS_DEBUG
+    assert ( (last- first) >= 0);
+#endif
+    if ((last - first) < 2) return first;
+    Iter_t it2 = first + 1;
+    for (Iter_t it1 = first; it2 != last and comp(*it2, *it1); it1 = it2++);
+    return it2;
+};
+//-----------------------------------------------------------------------------
+//  function : number_stable_sorted_forward
+/// @brief examine the elements in the range first, last if they are stable
+///        sorted, and return the number of elements sorted
+/// @param first : iterator to the first element in the range
+/// @param last : ierator after the last element of the range
+/// @param comp : object for to compare two elements
+/// @param min_process : minimal number of elements to be consideer
+/// @return number of element sorted. I f the number is lower than min_process
+///         return 0
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = std::less<value_iter<Iter_t> > >
+size_t number_stable_sorted_forward (Iter_t first, Iter_t last,
+		                             size_t min_process,
+                                     Compare comp = Compare())
+{
+#ifdef __BS_DEBUG
+    assert ( (last- first) >= 0);
+#endif
+    if ((last - first) < 2) return 0;
+
+    // sorted elements
+    Iter_t it2 = first + 1;
+    for (Iter_t it1 = first; it2 != last and not comp(*it2, *it1); it1 = it2++);
+    size_t nsorted = size_t ( it2 - first);
+    if ( nsorted != 1)
+    	return (nsorted >= min_process) ? nsorted: 0;
+
+    // reverse sorted elements
+    it2 = first + 1;
+    for (Iter_t it1 = first; it2 != last and comp(*it2, *it1); it1 = it2++);
+    nsorted = size_t ( it2 - first);
+
+    if ( nsorted < min_process) return 0 ;
+    util::reverse ( first , it2);
+    return nsorted;
+};
+
+//-----------------------------------------------------------------------------
+//  function : is_stable_sorted_backward
+/// @brief examine the elements in the range first, last beginning at end, and
+///        if they are stablesorted, and return an iterator to the last element
+///        sorted
+/// @param first : iterator to the first element in the range
+/// @param last : ierator after the last element of the range
+/// @param comp : object for to compare two elements
+/// @return iterator to the last element stable sorted. The number of
+///         elements sorted is the last minus the iterator returned
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = std::less<value_iter<Iter_t> > >
+inline Iter_t is_stable_sorted_backward(Iter_t first, Iter_t last,
+                                        Compare comp = Compare())
+{
+#ifdef __BS_DEBUG
+    assert ( (last- first) >= 0);
+#endif
+    if ((last - first) < 2) return first;
+    Iter_t itaux = last - 1;
+    while (itaux != first and not comp(*itaux, *(itaux - 1))) {--itaux; };
+    return itaux;
+}
+//-----------------------------------------------------------------------------
+//  function : is_reverse_stable_sorted_backward
+/// @brief examine the elements in the range first, last beginning at end, and
+///        if they are stablesorted, and return an iterator to the last element
+///        sorted
+/// @param first : iterator to the first element in the range
+/// @param last : ierator after the last element of the range
+/// @param comp : object for to compare two elements
+/// @return iterator to the last element stable sorted. The number of
+///         elements sorted is the last minus the iterator returned
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = std::less<value_iter<Iter_t> > >
+inline Iter_t is_reverse_stable_sorted_backward (Iter_t first, Iter_t last,
+                                                 Compare comp = Compare())
+{
+#ifdef __BS_DEBUG
+    assert ( (last- first) >= 0);
+#endif
+    if ((last - first) < 2) return first;
+    Iter_t itaux = last - 1;
+    for (; itaux != first and comp(*itaux, *(itaux - 1)); --itaux);
+    return itaux;
+}
+
+//-----------------------------------------------------------------------------
+//  function : number_stable_sorted_backward
+/// @brief examine the elements in the range first, last if they are stable
+///        sorted, and return the number of elements sorted
+/// @param first : iterator to the first element in the range
+/// @param last : ierator after the last element of the range
+/// @param comp : object for to compare two elements
+/// @param min_process : minimal number of elements to be consideer
+/// @return number of element sorted. I f the number is lower than min_process
+///         return 0
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare = std::less<value_iter<Iter_t> > >
+size_t number_stable_sorted_backward (Iter_t first, Iter_t last,
+		                             size_t min_process,
+                                     Compare comp = Compare())
+{
+#ifdef __BS_DEBUG
+    assert ( (last- first) >= 0);
+#endif
+    if ((last - first) < 2) return 0;
+    Iter_t itaux = last - 1;
+    while (itaux != first and not comp(*itaux, *(itaux - 1))) {--itaux; };
+    size_t nsorted = size_t ( last - itaux);
+    if ( nsorted != 1)
+    	return ( nsorted >= min_process)?nsorted: 0 ;
+
+    itaux = last - 1;
+    for (; itaux != first and comp(*itaux, *(itaux - 1)); --itaux);
+    nsorted = size_t ( last - itaux);
+    if ( nsorted < min_process) return 0 ;
+    util::reverse ( itaux, last );
+    return nsorted;
+}
+//-----------------------------------------------------------------------------
+//  function : internal_sort
+/// @brief this function divide r_input in two parts, sort it,and merge moving
+///        the elements to range_buf
+/// @param range_input : range with the elements to sort
+/// @param range_buffer : range with the elements sorted
+/// @param comp : object for to compare two elements
+/// @param level : when is 1, sort with the insertionsort algorithm
+///                if not make a recursive call splitting the ranges
+//
+//-----------------------------------------------------------------------------
+template <class Iter1_t, class Iter2_t, class Compare>
+inline void internal_sort (const range<Iter1_t> &rng1,
+		                   const range<Iter2_t> &rng2,
+                           Compare comp, uint32_t level, bool even = true)
+{
+    //-----------------------------------------------------------------------
+    //                  metaprogram
+    //-----------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //-----------------------------------------------------------------------
+    //                  program
+    //-----------------------------------------------------------------------
+#ifdef __BS_DEBUG
+    assert (rng1.size ( ) == rng2.size ( ) );
+#endif
+    size_t nelem = (rng1.size() + 1) >> 1;
+
+    range<Iter1_t> rng1_left(rng1.first, rng1.first + nelem), 
+                   rng1_right(rng1.first + nelem, rng1.last);
+
+    range<Iter2_t> rng2_left(rng2.first, rng2.first + nelem), 
+                   rng2_right(rng2.first + nelem, rng2.last);
+
+    if (nelem <= 32 and (level & 1) == even)
+    {
+        insert_sort(rng1_left.first, rng1_left.last, comp);
+        insert_sort(rng1_right.first, rng1_right.last, comp);
+    }
+    else
+    {
+        internal_sort(rng2_left, rng1_left, comp, level + 1, even);
+        internal_sort(rng2_right, rng1_right, comp, level + 1, even);
+    };
+    merge(rng2, rng1_left, rng1_right, comp);
+};
+//-----------------------------------------------------------------------------
+//  function : range_sort_data
+/// @brief this sort elements using the range_sort function and receiving a
+///        buffer of initialized memory
+/// @param rng_data : range with the elements to sort
+/// @param rng_aux : range of at least the same memory than rng_data used as
+///                  auxiliary memory in the sorting
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static void range_sort_data (const range<Iter1_t> & rng_data,
+                             const range<Iter2_t> & rng_aux, Compare comp)
+{
+    //-----------------------------------------------------------------------
+    //                  metaprogram
+    //-----------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                    program
+    //------------------------------------------------------------------------
+#ifdef __BS_DEBUG
+    assert ( rng_data.size() == rng_aux.size());
+#endif
+    // minimal number of element before to jump to insertionsort
+    const uint32_t sort_min = 32;
+    if (rng_data.size() <= sort_min)
+    {
+        insert_sort(rng_data.first, rng_data.last, comp);
+        return;
+    };
+
+    internal_sort(rng_aux, rng_data, comp, 0, true);
+};
+//-----------------------------------------------------------------------------
+//  function : range_sort_buffer
+/// @brief this sort elements using the range_sort function and receiving a
+///        buffer of initialized memory
+/// @param rng_data : range with the elements to sort
+/// @param rng_aux : range of at least the same memory than rng_data used as
+///                  auxiliary memory in the sorting
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static void range_sort_buffer(const range<Iter1_t> & rng_data,
+                              const range<Iter2_t> & rng_aux, Compare comp)
+{
+    //-----------------------------------------------------------------------
+    //                  metaprogram
+    //-----------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                    program
+    //------------------------------------------------------------------------
+#ifdef __BS_DEBUG
+    assert ( rng_data.size() == rng_aux.size());
+#endif
+    // minimal number of element before to jump to insertionsort
+    const uint32_t sort_min = 32;
+    if (rng_data.size() <= sort_min)
+    {
+        insert_sort(rng_data.first, rng_data.last, comp);
+        move_forward(rng_aux, rng_data);
+        return;
+    };
+
+    internal_sort(rng_data, rng_aux, comp, 0, false);
+};
+//****************************************************************************
+};//    End namespace common
+};//    End namespace sort
+};//    End namepspace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/spinlock.hpp b/boost/sort/common/spinlock.hpp
new file mode 100644
index 0000000000..450ba6b53e
--- /dev/null
+++ b/boost/sort/common/spinlock.hpp
@@ -0,0 +1,88 @@
+//----------------------------------------------------------------------------
+/// @file spinlock_t.hpp
+/// @brief
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_SPINLOCK_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_SPINLOCK_HPP
+
+#include <atomic>
+#include <ctime>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <thread>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+//
+//---------------------------------------------------------------------------
+/// @class spinlock_t
+/// @brief This class implement, from atomic variables, a spinlock
+/// @remarks This class meet the BasicLockable requirements ( lock, unlock )
+//---------------------------------------------------------------------------
+class spinlock_t
+{
+  private:
+    //------------------------------------------------------------------------
+    //             P R I V A T E      V A R I A B L E S
+    //------------------------------------------------------------------------
+    std::atomic_flag af;
+
+  public:
+    //
+    //-------------------------------------------------------------------------
+    //  function : spinlock_t
+    /// @brief  class constructor
+    /// @param [in]
+    //-------------------------------------------------------------------------
+    explicit spinlock_t ( ) noexcept { af.clear ( ); };
+    //
+    //-------------------------------------------------------------------------
+    //  function : lock
+    /// @brief  Lock the spinlock_t
+    //-------------------------------------------------------------------------
+    void lock ( ) noexcept
+    {
+    	while (af.test_and_set (std::memory_order_acquire))
+        {
+            std::this_thread::yield ( );
+        };
+    };
+    //
+    //-------------------------------------------------------------------------
+    //  function : try_lock
+    /// @brief Try to lock the spinlock_t, if not, return false
+    /// @return true : locked
+    ///         false: not previous locked
+    //-------------------------------------------------------------------------
+    bool try_lock ( ) noexcept
+    {
+        return not af.test_and_set (std::memory_order_acquire);
+    };
+    //
+    //-------------------------------------------------------------------------
+    //  function : unlock
+    /// @brief  unlock the spinlock_t
+    //-------------------------------------------------------------------------
+    void unlock ( ) noexcept { af.clear (std::memory_order_release); };
+
+}; // E N D    C L A S S     S P I N L O C K
+//
+//***************************************************************************
+}; // end namespace common
+}; // end namespace sort
+}; // end namespace boost
+//***************************************************************************
+#endif
diff --git a/boost/sort/common/stack_cnc.hpp b/boost/sort/common/stack_cnc.hpp
new file mode 100644
index 0000000000..d4d6e53b25
--- /dev/null
+++ b/boost/sort/common/stack_cnc.hpp
@@ -0,0 +1,142 @@
+//----------------------------------------------------------------------------
+/// @file   stack_cnc.hpp
+/// @brief  This file contains the implementation concurrent stack
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_STACK_CNC_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_STACK_CNC_HPP
+
+#include <boost/sort/common/spinlock.hpp>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+
+//
+//###########################################################################
+//                                                                         ##
+//    ################################################################     ##
+//    #                                                              #     ##
+//    #                      C L A S S                               #     ##
+//    #                   S T A C K _ C N C                          #     ##
+//    #                                                              #     ##
+//    ################################################################     ##
+//                                                                         ##
+//###########################################################################
+//
+//---------------------------------------------------------------------------
+/// @class  stack_cnc
+/// @brief This class is a concurrent stack controled by a spin_lock
+/// @remarks
+//---------------------------------------------------------------------------
+template<typename T, typename Allocator = std::allocator<T> >
+class stack_cnc
+{
+public:
+    //------------------------------------------------------------------------
+    //                     D E F I N I T I O N S
+    //------------------------------------------------------------------------
+    typedef std::vector<T, Allocator> vector_t;
+    typedef typename vector_t::size_type size_type;
+    typedef typename vector_t::difference_type difference_type;
+    typedef typename vector_t::value_type value_type;
+    typedef typename vector_t::pointer pointer;
+    typedef typename vector_t::const_pointer const_pointer;
+    typedef typename vector_t::reference reference;
+    typedef typename vector_t::const_reference const_reference;
+    typedef typename vector_t::allocator_type allocator_type;
+    typedef Allocator alloc_t;
+
+protected:
+    //-------------------------------------------------------------------------
+    //                   INTERNAL VARIABLES
+    //-------------------------------------------------------------------------
+    vector_t v_t;
+    mutable spinlock_t spl;
+
+public:
+    //
+    //-------------------------------------------------------------------------
+    //  function : stack_cnc
+    /// @brief  constructor
+    //-------------------------------------------------------------------------
+    explicit stack_cnc(void): v_t() { };
+
+    //
+    //-------------------------------------------------------------------------
+    //  function : stack_cnc
+    /// @brief  Move constructor
+    //-------------------------------------------------------------------------
+    stack_cnc(stack_cnc &&) = delete;
+    //
+    //-------------------------------------------------------------------------
+    //  function : ~stack_cnc
+    /// @brief  Destructor
+    //-------------------------------------------------------------------------
+    virtual ~stack_cnc(void) { v_t.clear(); };
+
+    //-------------------------------------------------------------------------
+    //  function : emplace_back
+    /// @brief Insert one element in the back of the container
+    /// @param args : group of arguments for to build the object to insert. Can
+    ///               be values, references or rvalues
+    //-------------------------------------------------------------------------
+    template<class ... Args>
+    void emplace_back(Args &&... args)
+    {
+        std::lock_guard < spinlock_t > guard(spl);
+        v_t.emplace_back(std::forward< Args > (args)...);
+    };
+
+    //
+    //-------------------------------------------------------------------------
+    //  function :pop_move_back
+    /// @brief if exist, move the last element to P, and delete it
+    /// @param P : reference to a variable where move the element
+    /// @return  true  - Element moved and deleted
+    ///          false - Empty stack_cnc
+    //-------------------------------------------------------------------------
+    bool pop_move_back(value_type &P)
+    {
+        std::lock_guard < spinlock_t > S(spl);
+        if (v_t.size() == 0) return false;
+        P = std::move(v_t.back());
+        v_t.pop_back();
+        return true;
+    };
+    //-------------------------------------------------------------------------
+    //  function : push_back
+    /// @brief Insert one vector at the end of the container
+    /// @param v_other : vector to insert
+    /// @return reference to the stack_cnc after the insertion
+    //-------------------------------------------------------------------------
+    template<class Allocator2>
+    stack_cnc &push_back(const std::vector<value_type, Allocator2> &v_other)
+    {
+        std::lock_guard < spinlock_t > guard(spl);
+        for (size_type i = 0; i < v_other.size(); ++i)
+        {
+            v_t.push_back(v_other[i]);
+        }
+        return *this;
+    };
+};
+// end class stack_cnc
+
+//***************************************************************************
+};// end namespace common
+};// end namespace sort
+};// end namespace boost
+//***************************************************************************
+#endif
diff --git a/boost/sort/common/time_measure.hpp b/boost/sort/common/time_measure.hpp
new file mode 100644
index 0000000000..ef00dd4930
--- /dev/null
+++ b/boost/sort/common/time_measure.hpp
@@ -0,0 +1,62 @@
+//----------------------------------------------------------------------------
+/// @file time_measure.hpp
+/// @brief This class is done in order to simplify the time measure in the
+///        benchmaark programs
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_TOOLS_TIME_MEASURE_HPP
+#define __BOOST_SORT_PARALLEL_TOOLS_TIME_MEASURE_HPP
+
+#include <chrono>
+
+namespace boost       
+{
+namespace sort        
+{
+namespace common      
+{
+
+namespace chrn = std::chrono;
+//
+//***************************************************************************
+//                D E F I N I T I O N S
+//***************************************************************************
+typedef chrn::steady_clock::time_point           time_point;
+
+time_point now ( );
+double subtract_time  ( const time_point & t1, const time_point & t2 );
+//
+//---------------------------------------------------------------------------
+//  function : now
+/// @brief return the time system in a internal format ( steady_clock)
+/// @return time in steady_clock format
+//---------------------------------------------------------------------------
+time_point now ( ) {   return chrn::steady_clock::now( ); };
+//
+//---------------------------------------------------------------------------
+//  function : subtract_time
+/// @brief return the time in double format
+/// @param [in] t1 : first  time in time_point format
+/// @param [in] t2 : second time in time_point format
+/// @return time in seconds of the difference of t1 - t2
+//---------------------------------------------------------------------------
+double subtract_time  ( const time_point & t1, const time_point & t2 )
+{   //------------------------ begin ---------------------------------
+    chrn::duration<double> time_span =
+                chrn::duration_cast < chrn::duration < double > > ( t1 - t2 );
+    return  time_span.count( );
+};
+
+//***************************************************************************
+};//    End namespace benchmark
+};//    End namespace sort
+};//    End namespace boost
+//***************************************************************************
+#endif
diff --git a/boost/sort/common/util/algorithm.hpp b/boost/sort/common/util/algorithm.hpp
new file mode 100644
index 0000000000..db7607aaeb
--- /dev/null
+++ b/boost/sort/common/util/algorithm.hpp
@@ -0,0 +1,309 @@
+//----------------------------------------------------------------------------
+/// @file algorithm.hpp
+/// @brief low level functions of create, destroy, move and merge functions
+///
+/// @author Copyright (c) 2017 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_UTIL_ALGORITHM_HPP
+#define __BOOST_SORT_COMMON_UTIL_ALGORITHM_HPP
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+#include <boost/sort/common/util/traits.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+//
+//###########################################################################
+//
+//                       I M P O R T A N T
+//
+// The functions of this file are for internal use only
+// All the operations are done with move operations, because the copy
+// operations are unnecesary
+//
+//###########################################################################
+//
+//----------------------------------------------------------------------------
+//
+//         F U N C T I O N S   I N   T H E   F I L E
+//
+//----------------------------------------------------------------------------
+//
+// static inline uint32_t nbits32 (uint32_t num) noexcept
+//
+// static inline uint32_t nbits64 (uint64_t num)
+//
+// template < class Value_t, class... Args >
+// inline void construct_object (Value_t *ptr, Args &&... args)
+//
+// template < class Value_t >
+// inline void destroy_object (Value_t *ptr)
+//
+// template < class Iter_t, class Value_t = value_iter<Iter_t> >
+// void initialize (Iter_t first, Iter_t last, Value_t && val)
+//
+// template < class Iter1_t, class Iter2_t >
+// Iter2_t move_forward (Iter2_t it_dest, Iter1_t first, Iter1_t last)
+//
+// template < class Iter1_t, class Iter2_t >
+// Iter2_t move_backward (Iter2_t it_dest, Iter1_t first, Iter1_t last)
+//
+// template < class Iter_t, class Value_t = value_iter< Iter_t > >
+// Value_t * move_construct (Value_t *ptr, Iter_t first, Iter_t last)
+//
+// template < class Iter_t >
+// void destroy (Iter_t first, const Iter_t last)
+//
+// template < class Iter_t >
+// void reverse (Iter_t first, const Iter_t last)
+//
+//----------------------------------------------------------------------------
+//
+//--------------------------------------------------------------------------
+//
+//                    G L O B A L     V A R I B L E S
+//
+//--------------------------------------------------------------------------
+//
+// this array represent the number of bits needed for to represent the
+// first 256 numbers
+static constexpr const uint32_t tmsb[256] =
+{ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+                5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+                6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
+                7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8,
+                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+//
+//---------------------------------------------------------------------------
+//
+//                           F U N C T I O N S
+//
+//---------------------------------------------------------------------------
+//
+//---------------------------------------------------------------------------
+//  function : nbits32
+/// @brief Obtain the number of bits of a number equal or greater than num
+/// @param num : Number to examine
+/// @return Number of bits
+//---------------------------------------------------------------------------
+static inline uint32_t nbits32 (uint32_t num) noexcept
+{
+    int Pos = (num & 0xffff0000U) ? 16 : 0;
+    if ((num >> Pos) & 0xff00U) Pos += 8;
+    return (tmsb[num >> Pos] + Pos);
+}
+//
+//---------------------------------------------------------------------------
+//  function : nbits64
+/// @brief Obtain the number of bits of a number equal or greater than num
+/// @param num : Number to examine
+/// @exception none
+/// @return Number of bits
+//---------------------------------------------------------------------------
+static inline uint32_t nbits64(uint64_t num)noexcept
+{
+    uint32_t Pos = (num & 0xffffffff00000000ULL) ? 32 : 0;
+    if ((num >> Pos) & 0xffff0000ULL) Pos += 16;
+    if ((num >> Pos) & 0xff00ULL) Pos += 8;
+    return (tmsb[num >> Pos] + Pos);
+}
+//
+//-----------------------------------------------------------------------------
+//  function : construct_object
+/// @brief create an object in the memory specified by ptr
+///
+/// @param ptr : pointer to the memory where to create the object
+/// @param args : arguments to the constructor
+//-----------------------------------------------------------------------------
+template <class Value_t, class ... Args>
+inline void construct_object (Value_t *ptr, Args &&... args)
+{
+    (::new (static_cast<void *>(ptr)) Value_t(std::forward< Args > (args)...));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : destroy_object
+/// @brief destroy an object in the memory specified by ptr
+/// @param ptr : pointer to the object to destroy
+//-----------------------------------------------------------------------------
+template<class Value_t>
+inline void destroy_object(Value_t *ptr)
+{
+    ptr->~Value_t();
+};
+//
+//-----------------------------------------------------------------------------
+//  function : initialize
+/// @brief initialize a range of objects with the object val moving across them
+///
+/// @param first : itertor to the first element to initialize
+/// @param last : iterator to the last element to initialize
+/// @param val : object used for the initialization
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Value_t = value_iter<Iter_t> >
+inline void initialize (Iter_t first, Iter_t last, Value_t & val)
+{
+    //------------------------------------------------------------------------
+    //                  Metaprogramming
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter_t> value_t;
+    static_assert (std::is_same< Value_t, value_t >::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                 Code
+    //------------------------------------------------------------------------
+    if (first == last) return;
+    construct_object(&(*first), std::move(val));
+
+    Iter_t it1 = first, it2 = first + 1;
+    while (it2 != last)
+    {
+        construct_object(&(*(it2++)), std::move(*(it1++)));
+    };
+    val = std::move(*(last - 1));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : move_forward
+/// @brief Move initialized objets
+/// @param it_dest : iterator to the final place of the objects
+/// @param first : iterator to the first element to move
+/// @param last : iterator to the last element to move
+/// @return Output iterator to the element past the last element
+///         moved (it_dest + (last - first))
+//-----------------------------------------------------------------------------
+template <class Iter1_t, class Iter2_t>
+inline Iter2_t move_forward (Iter2_t it_dest, Iter1_t first, Iter1_t last)
+{
+    //------------------------------------------------------------------------
+    //                  Metaprogramming
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value1_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value1_t, value2_t >::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                 Code
+    //------------------------------------------------------------------------
+    while (first != last)
+    {   *it_dest++ = std::move(*first++);
+    }
+    return it_dest;
+
+};
+//
+//-----------------------------------------------------------------------------
+//  function : move_backard
+/// @brief Move initialized objets in reverse order
+/// @param it_dest : last iterator to the final place of the objects
+/// @param first : iterator to the first element to move
+/// @param last : iterator to the last element to move
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t>
+inline Iter2_t move_backward(Iter2_t it_dest, Iter1_t  first, Iter1_t last)
+{
+    //------------------------------------------------------------------------
+    //                  Metaprogramming
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value1_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value1_t, value2_t >::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                 Code
+    //------------------------------------------------------------------------
+    while (first != last)
+    {   *(--it_dest) = std::move (*(--last));
+    }
+    return it_dest;
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : move_construct
+/// @brief Move objets to uninitialized memory
+///
+/// @param ptr : pointer to the memory where to create the objects
+/// @param first : iterator to the first element to move
+/// @param last : iterator to the last element to move
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Value_t = value_iter<Iter_t> >
+inline Value_t * move_construct(Value_t *ptr, Iter_t first, Iter_t last)
+{
+    //------------------------------------------------------------------------
+    //                  Metaprogramming
+    //------------------------------------------------------------------------
+    typedef typename iterator_traits<Iter_t>::value_type value2_t;
+    static_assert (std::is_same< Value_t, value2_t >::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                    Code
+    //------------------------------------------------------------------------
+    while (first != last)
+    {
+        ::new (static_cast<void *>(ptr++)) Value_t(std::move(*(first++)));
+    };
+    return ptr;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : destroy
+/// @brief destroy the elements between first and last
+/// @param first : iterator to the first element to destroy
+/// @param last : iterator to the last element to destroy
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+inline void destroy(Iter_t first, const Iter_t last)
+{
+    while (first != last)
+        destroy_object(&(*(first++)));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : reverse
+/// @brief destroy the elements between first and last
+/// @param first : iterator to the first element to destroy
+/// @param last : iterator to the last element to destroy
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+inline void reverse(Iter_t first, Iter_t last)
+{
+    std::reverse ( first, last);
+};
+//
+//****************************************************************************
+};//    End namespace util
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/util/atomic.hpp b/boost/sort/common/util/atomic.hpp
new file mode 100644
index 0000000000..15906fe52a
--- /dev/null
+++ b/boost/sort/common/util/atomic.hpp
@@ -0,0 +1,98 @@
+//----------------------------------------------------------------------------
+/// @file atomic.hpp
+/// @brief Basic layer for to simplify the use of atomic functions
+/// @author Copyright(c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_ATOMIC_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_ATOMIC_HPP
+
+#include <atomic>
+#include <cassert>
+#include <type_traits>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+//-----------------------------------------------------------------------------
+//  function : atomic_read
+/// @brief make the atomic read of an atomic variable, using a memory model
+/// @param at_var : atomic variable to read
+/// @return value obtained
+//-----------------------------------------------------------------------------
+template<typename T>
+inline T atomic_read(std::atomic<T> &at_var)
+{
+    return std::atomic_load_explicit < T > (&at_var, std::memory_order_acquire);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : atomic_add
+/// @brief Add a number to an atomic variable, using a memory model
+/// @param at_var : variable to add
+/// @param num : value to add to at_var
+/// @return result of the operation
+//-----------------------------------------------------------------------------
+template<typename T, typename T2>
+inline T atomic_add(std::atomic<T> &at_var, T2 num)
+{
+    static_assert (std::is_integral< T2 >::value, "Bad parameter");
+    return std::atomic_fetch_add_explicit <T> 
+                               (&at_var, (T) num, std::memory_order_acq_rel);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : atomic_sub
+/// @brief Atomic subtract of an atomic variable using memory model
+/// @param at_var : Varibale to subtract
+/// @param num : value to sub to at_var
+/// @return result of the operation
+//-----------------------------------------------------------------------------
+template<typename T, typename T2>
+inline T atomic_sub(std::atomic<T> &at_var, T2 num)
+{
+    static_assert (std::is_integral< T2 >::value, "Bad parameter");
+    return std::atomic_fetch_sub_explicit <T> 
+                                (&at_var, (T) num, std::memory_order_acq_rel);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : atomic_write
+/// @brief Write a value in an atomic variable using memory model
+/// @param at_var : varible to write
+/// @param num : value to write in at_var
+//-----------------------------------------------------------------------------
+template<typename T, typename T2>
+inline void atomic_write(std::atomic<T> &at_var, T2 num)
+{
+    static_assert (std::is_integral< T2 >::value, "Bad parameter");
+    std::atomic_store_explicit <T> 
+                                (&at_var, (T) num, std::memory_order_release);
+};
+template<typename T>
+struct counter_guard
+{
+    typedef std::atomic<T> atomic_t;
+    atomic_t &count;
+
+    counter_guard(atomic_t & counter): count(counter) { };
+    ~counter_guard() {atomic_sub(count, 1); };
+};
+//
+//****************************************************************************
+};// End namespace util
+};// End namespace common
+};// End namespace sort
+};// End namespace boost
+//****************************************************************************
+#endif
diff --git a/boost/sort/common/util/circular_buffer.hpp b/boost/sort/common/util/circular_buffer.hpp
new file mode 100644
index 0000000000..2fc7e973e1
--- /dev/null
+++ b/boost/sort/common/util/circular_buffer.hpp
@@ -0,0 +1,572 @@
+//----------------------------------------------------------------------------
+/// @file   circular_buffer.hpp
+/// @brief  This file contains the implementation of the circular buffer
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanyingfile LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_UTIL_CIRCULAR_BUFFER_HPP
+#define __BOOST_SORT_COMMON_UTIL_CIRCULAR_BUFFER_HPP
+
+#include <memory>
+#include <cassert>
+#include <exception>
+#include <boost/sort/common/util/algorithm.hpp>
+#include <boost/sort/common/util/traits.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+
+//---------------------------------------------------------------------------
+/// @class  circular_buffer
+/// @brief  This class implement a circular buffer
+/// @remarks
+//---------------------------------------------------------------------------
+template <class Value_t, uint32_t Power2 = 11>
+struct circular_buffer
+{
+    //------------------------------------------------------------------------
+    //                          STATIC CHECK
+    //------------------------------------------------------------------------
+    static_assert ( Power2 != 0, "Wrong Power2");
+
+    //------------------------------------------------------------------------
+    //                          DEFINITIONS
+    //------------------------------------------------------------------------
+    typedef Value_t value_t;
+
+    //------------------------------------------------------------------------
+    //                          VARIABLES
+    //------------------------------------------------------------------------
+    const size_t NMAX = (size_t) 1 << Power2;
+    const size_t MASK = (NMAX - 1);
+    const size_t BLOCK_SIZE = NMAX >> 1;
+    const size_t LOG_BLOCK = Power2 - 1;
+    Value_t * ptr = nullptr;
+
+    //------------------------------------------------------------------------
+    // first and last are  the position of the first and last elements
+    // always are in the range [0, NMAX - 1]
+    //------------------------------------------------------------------------
+    size_t nelem, first_pos;
+    bool initialized;
+
+    //
+    //------------------------------------------------------------------------
+    //  function : circular_buffer
+    /// @brief  constructor of the class
+    //-----------------------------------------------------------------------
+    circular_buffer(void)
+    : ptr(nullptr), nelem(0), first_pos(0), initialized(false)
+    {
+        ptr = std::get_temporary_buffer < Value_t > (NMAX).first;
+        if (ptr == nullptr) throw std::bad_alloc();
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : ~circular_buffer
+    /// @brief destructor of the class
+    //-----------------------------------------------------------------------
+    ~circular_buffer()
+    {
+        if (initialized)
+        {   for (size_t i = 0; i < NMAX; ++i) (ptr + i)->~Value_t();
+            initialized = false;
+        };
+        std::return_temporary_buffer(ptr);
+    }
+    ;
+    //
+    //------------------------------------------------------------------------
+    //  function : initialize
+    /// @brief : initialize the memory of the buffer from the uninitialize
+    //           memory obtained from the temporary buffer
+    /// @param val : value used to initialize the memory
+    //-----------------------------------------------------------------------
+    void initialize(Value_t & val)
+    {
+        assert (initialized == false);
+        ::new (static_cast<void*>(ptr)) Value_t(std::move(val));
+        for (size_t i = 1; i < NMAX; ++i)
+            ::new (static_cast<void*>(ptr + i)) Value_t(std::move(ptr[i - 1]));
+        val = std::move(ptr[NMAX - 1]);
+        initialized = true;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : destroy_all
+    /// @brief : destroy all the objects in the internal memory
+    //-----------------------------------------------------------------------
+    void destroy_all(void) { destroy(ptr, ptr + NMAX); };
+    //
+    //------------------------------------------------------------------------
+    //  function : get_buffer
+    /// @brief return the internal memory of the circular buffer
+    /// @return pointer to the internal memory of the buffer
+    //-----------------------------------------------------------------------
+    Value_t * get_buffer(void) { return ptr; };
+    //
+    //------------------------------------------------------------------------
+    //  function : empty
+    /// @brief return if the buffer is empty
+    /// @return true : empty
+    //-----------------------------------------------------------------------
+    bool empty(void) const {return (nelem == 0); };
+    //
+    //------------------------------------------------------------------------
+    //  function : full
+    /// @brief return if the buffer is full
+    /// @return true : full
+    //-----------------------------------------------------------------------
+    bool full(void) const { return (nelem == NMAX); };
+    //
+    //------------------------------------------------------------------------
+    //  function : size
+    /// @brief return the number of elements stored in the buffer
+    /// @return number of elements stored
+    //-----------------------------------------------------------------------
+    size_t size(void) const { return nelem;};
+    //
+    //------------------------------------------------------------------------
+    //  function : capacity
+    /// @brief : return the maximun capacity of the buffer
+    /// @return number of elements
+    //-----------------------------------------------------------------------
+    size_t capacity(void) const { return NMAX;};
+    //
+    //------------------------------------------------------------------------
+    //  function : free_size
+    /// @brief return the free positions in the buffer
+    /// @return number of elements
+    //-----------------------------------------------------------------------
+    size_t free_size(void) const  { return (NMAX - nelem); };
+    //
+    //------------------------------------------------------------------------
+    //  function : clear
+    /// @brief clear the buffer
+    //-----------------------------------------------------------------------
+    void clear(void)  { nelem = first_pos = 0; };
+    //
+    //------------------------------------------------------------------------
+    //  function : front
+    /// @brief return the first element of the buffer
+    /// @return reference to the first value
+    //-----------------------------------------------------------------------
+    Value_t & front(void)
+    {
+#ifdef __BS_DEBUG
+        assert (nelem > 0);
+#endif
+        return (ptr[first_pos]);
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function :front
+    /// @brief return the first element of the buffer
+    /// @return const reference to the first value
+    //-----------------------------------------------------------------------
+    const Value_t & front(void) const
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        return (ptr[first_pos]);
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : back
+    /// @brief reference to the last value of the buffer
+    /// @return reference to the last value
+    //-----------------------------------------------------------------------
+    Value_t & back(void)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        return (ptr[(first_pos + nelem - 1) & MASK]);
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : back
+    /// @brief reference to the last value of the buffer
+    /// @return const reference to the last value
+    //-----------------------------------------------------------------------
+    const Value_t & back(void) const
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        return (ptr[(first_pos + nelem - 1) & MASK]);
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : operator []
+    /// @brief positional access to the elements
+    /// @param pos rquested
+    /// @return reference to the element
+    //-----------------------------------------------------------------------
+    Value_t & operator[](uint32_t pos)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        return ptr[(first_pos + pos) & MASK];
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : operator []
+    /// @brief positional access to the elements
+    /// @param pos rquested
+    /// @return const reference to the element
+    //-----------------------------------------------------------------------
+    const Value_t & operator[](uint32_t pos) const
+    {
+
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        return ptr[(first_pos + pos) & MASK];
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : push_front
+    /// @brief insert an element in the first position of the buffer
+    /// @param val : const value to insert
+    //-----------------------------------------------------------------------
+    void push_front(const Value_t & val)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem != NMAX);
+#endif
+        ++nelem;
+        first_pos = ((first_pos + MASK) & MASK);
+        ptr[first_pos] = val;
+
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : push_front
+    /// @brief insert an element in the first position of the buffer
+    /// @param val : rvalue to insert
+    //-----------------------------------------------------------------------
+    void push_front(Value_t && val)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem != NMAX);
+#endif
+        ++nelem;
+        first_pos = ((first_pos + MASK) & MASK);
+        ptr[first_pos] = val;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : push_back
+    /// @brief insert an element in the last position of the buffer
+    /// @param val : value to insert
+    //-----------------------------------------------------------------------
+    void push_back(const Value_t & val)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem != NMAX);
+#endif
+        ptr[(first_pos + (nelem++)) & MASK] = val;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : push_back
+    /// @brief insert an element in the last position of the buffer
+    /// @param val : value to insert
+    //-----------------------------------------------------------------------
+    void push_back(Value_t && val)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem != NMAX);
+#endif
+        ptr[(first_pos + (nelem++)) & MASK] = std::move(val);
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : pop_front
+    /// @brief remove the first element of the buffer
+    //-----------------------------------------------------------------------
+    void pop_front(void)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        --nelem;
+        (++first_pos) &= MASK;
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : pop_back
+    /// @brief remove the last element of the buffer
+    //-----------------------------------------------------------------------
+    void pop_back(void)
+    {
+#ifdef __BS_DEBUG
+        assert ( nelem > 0 );
+#endif
+        --nelem;
+    };
+
+    template<class iter_t>
+    void pop_copy_front(iter_t it_dest, size_t num);
+
+    template<class iter_t>
+    void pop_move_front(iter_t it_dest, size_t num);
+
+    template<class iter_t>
+    void pop_copy_back(iter_t it_dest, size_t num);
+
+    template<class iter_t>
+    void pop_move_back(iter_t it_dest, size_t num);
+
+    template<class iter_t>
+    void push_copy_front(iter_t it_src, size_t num);
+
+    template<class iter_t>
+    void push_move_front(iter_t it_src, size_t num);
+
+    template<class iter_t>
+    void push_copy_back(iter_t it_src, size_t num);
+
+    template<class iter_t>
+    void push_move_back(iter_t it_src, size_t num);
+
+//---------------------------------------------------------------------------
+};//               End of class circular_buffer
+//---------------------------------------------------------------------------
+//
+//
+//############################################################################
+//                                                                          ##
+//             N O N    I N L I N E    F U N C T I O N S                    ##
+//                                                                          ##
+//############################################################################
+//
+//------------------------------------------------------------------------
+//  function : pop_copy_front
+/// @brief copy and delete num elements from the front of the buffer
+/// @param it_dest : iterator to the first position where copy the elements
+/// @param num : number of elements to copy
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::pop_copy_front(iter_t it_dest, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( num <= nelem);
+#endif
+    nelem -= num;
+    size_t pos = first_pos;
+    first_pos = (first_pos + num) & MASK;
+    for (size_t i = 0; i < num; ++i)
+    {
+        *(it_dest++) = ptr[pos++ & MASK];
+    };
+    first_pos &= MASK;
+};
+//
+//------------------------------------------------------------------------
+//  function : pop_move_front
+/// @brief move num elements from the front of the buffer to the place
+//         pointed by it_dest
+/// @param it_dest : iterator to the first position where move the elements
+/// @param num : number of elements to move
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+:: pop_move_front(iter_t it_dest, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( num <= nelem);
+#endif
+    nelem -= num;
+    size_t pos = first_pos;
+    first_pos = (first_pos + num) & MASK;
+    for (size_t i = 0; i < num; ++i)
+    {
+        *(it_dest++) = std::move(ptr[pos++ & MASK]);
+    };
+    first_pos &= MASK;
+};
+//
+//------------------------------------------------------------------------
+//  function : pop_copy_back
+/// @brief copy and delete num elements from the back of the buffer
+/// @param p1 : iterator where begin to copy the elements
+/// @param num : number of elements to copy
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::pop_copy_back(iter_t it_dest, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( num <= nelem);
+#endif
+    nelem -= num;
+    size_t pos = (first_pos + nelem) & MASK;
+    for (size_t i = 0; i < num; ++i)
+    {
+        *(it_dest++) = ptr[pos++ & MASK];
+    };
+};
+//
+//------------------------------------------------------------------------
+//  function : pop_move_back
+/// @brief move and delete num elements from the back of the buffer
+/// @param p1 : iterator where begin to move the elements
+/// @param num : number of elements to move
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::pop_move_back(iter_t it_dest, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( num <= nelem);
+#endif
+    nelem -= num;
+    size_t pos = (first_pos + nelem) & MASK;
+    for (size_t i = 0; i < num; ++i)
+    {
+        *(it_dest++) = std::move(ptr[pos++ & MASK]);
+    };
+};
+//
+//------------------------------------------------------------------------
+//  function : push_copy_front
+/// @brief copy num elements in the front of the buffer
+/// @param it_src : iterator from where begin to copy the elements
+/// @param mun : number of element to copy
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::push_copy_front(iter_t it_src, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( free_size() >= num);
+#endif
+    nelem += num;
+
+    first_pos = (first_pos + NMAX - num) & MASK;
+    size_t pos = first_pos;
+    for (size_t i = 0; i < num; ++i)
+    {
+        ptr[(pos++) & MASK] = *(it_src++);
+    };
+};
+//
+//------------------------------------------------------------------------
+//  function : push_move_front
+/// @brief move num elements in the front of the buffer
+/// @param p1 : iterator from where begin to move the elements
+/// @param mun : number of element to move
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::push_move_front(iter_t it_src, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( free_size() >= num);
+#endif
+    nelem += num;
+    size_t pos = first_pos;
+    for (size_t i = 0; i < num; ++i)
+    {
+        ptr[(pos++) & MASK] = std::move(*(it_src++));
+    };
+};
+//
+//------------------------------------------------------------------------
+//  function : push_copy_back
+/// @brief copy num elements in the back of the buffer
+/// @param p1 : iterator from where begin to copy the elements
+/// @param mun : number of element to copy
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::push_copy_back(iter_t it_src, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( free_size() >= num);
+#endif
+    size_t pos = first_pos + nelem;
+    nelem += num;
+    for (size_t i = 0; i < num; ++i)
+    {
+        ptr[(pos++) & MASK] = *(it_src++);
+    };
+};
+//
+//------------------------------------------------------------------------
+//  function : push_move_back
+/// @brief move num elements in the back of the buffer
+/// @param p1 : iterator from where begin to move the elements
+/// @param mun : number of element to move
+//-----------------------------------------------------------------------
+template <class Value_t, uint32_t Power2>
+template<class iter_t>
+void circular_buffer<Value_t, Power2>
+::push_move_back(iter_t it_src, size_t num)
+{
+    static_assert ( std::is_same <value_iter<iter_t>, Value_t>::value,
+                    "Incompatible iterator");
+    if (num == 0) return;
+#ifdef __BS_DEBUG
+    assert ( free_size() >= num);
+#endif
+    size_t pos = first_pos + nelem;
+    nelem += num;
+    for (size_t i = 0; i < num; ++i)
+    {
+        ptr[(pos++) & MASK] = std::move(*(it_src++));
+    };
+};
+
+//****************************************************************************
+};// End namespace util
+};// End namespace common
+};// End namespace sort
+};// End namespace boost
+//****************************************************************************
+#endif
diff --git a/boost/sort/common/util/insert.hpp b/boost/sort/common/util/insert.hpp
new file mode 100644
index 0000000000..219fa8a351
--- /dev/null
+++ b/boost/sort/common/util/insert.hpp
@@ -0,0 +1,142 @@
+//----------------------------------------------------------------------------
+/// @file insert.hpp
+/// @brief
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_UTIL_INSERT_HPP
+#define __BOOST_SORT_COMMON_UTIL_INSERT_HPP
+
+//#include <boost/sort/spinsort/util/indirect.hpp>
+#include <boost/sort/common/util/insert.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/util/algorithm.hpp>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+#include <cstddef>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+namespace here = boost::sort::common::util;
+//
+//############################################################################
+//
+//          D E F I N I T I O N S    O F    F U N C T I O N S
+//    
+// template < class Iter1_t, class Iter2_t, typename Compare>
+// void insert_sorted (Iter1_t first, Iter1_t mid, Iter1_t last,
+//                     Compare comp, Iter2_t  it_aux)
+//
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : insert_sorted
+/// @brief : Insertion sort of elements sorted
+/// @param first: iterator to the first element of the range
+/// @param mid : last pointer of the sorted data, and first pointer to the
+///               elements to insert
+/// @param last : iterator to the next element of the last in the range
+/// @param comp :
+/// @comments : the two ranges are sorted and in it_aux there is spave for 
+///             to store temporally the elements to insert
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, typename Compare>
+static void insert_sorted(Iter1_t first, Iter1_t mid, Iter1_t last,
+                          Compare comp, Iter2_t it_aux)
+{
+    //------------------------------------------------------------------------
+    //                 metaprogram
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //--------------------------------------------------------------------
+    //                   program
+    //--------------------------------------------------------------------
+    if (mid == last) return;
+    if (first == mid) return;
+
+    //------------------------------------------------------------------------
+    // creation of the vector of elements to insert and their position in the
+    // sorted part
+    // the data are inserted in it_aux
+    //-----------------------------------------------------------------------
+    move_forward(it_aux, mid, last);
+
+    // search of the iterators where insert the new elements
+    size_t ndata = last - mid;
+    Iter1_t mv_first = mid, mv_last = mid;
+
+    for (size_t i = ndata; i > 0; --i)
+    {
+        mv_last = mv_first;
+        mv_first = std::upper_bound(first, mv_last, it_aux[i - 1], comp);
+        Iter1_t it1 = here::move_backward(mv_last + i, mv_first, mv_last);
+        *(it1 - 1) = std::move(it_aux[i - 1]);
+    };
+};
+
+template<class Iter1_t, class Iter2_t, typename Compare>
+static void insert_sorted_backward(Iter1_t first, Iter1_t mid, Iter1_t last,
+                                   Compare comp, Iter2_t it_aux)
+{
+    //------------------------------------------------------------------------
+    //                 metaprogram
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //--------------------------------------------------------------------
+    //                   program
+    //--------------------------------------------------------------------
+    if (mid == last) return;
+    if (first == mid) return;
+    //------------------------------------------------------------------------
+    // creation of the vector of elements to insert and their position in the
+    // sorted part
+    // the data are inserted in it_aux
+    //-----------------------------------------------------------------------
+    move_forward(it_aux, first, mid);
+
+    // search of the iterators where insert the new elements
+    size_t ndata = mid - first;
+    Iter1_t mv_first = mid, mv_last = mid;
+
+    for (size_t i = 0; i < ndata; ++i)
+    {
+        mv_first = mv_last;
+        mv_last = std::lower_bound(mv_first, last, it_aux[i], comp);
+        Iter1_t it1 = move_forward(mv_first - (ndata - i), mv_first, mv_last);
+        *(it1) = std::move(it_aux[i]);
+    };
+
+};
+//
+//****************************************************************************
+};//    End namespace util
+};//    End namepspace common
+};//    End namespace sort
+};//    End namepspace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/util/merge.hpp b/boost/sort/common/util/merge.hpp
new file mode 100644
index 0000000000..5fc90c0fd4
--- /dev/null
+++ b/boost/sort/common/util/merge.hpp
@@ -0,0 +1,494 @@
+//----------------------------------------------------------------------------
+/// @file merge.hpp
+/// @brief low level merge functions
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_UTIL_MERGE_HPP
+#define __BOOST_SORT_COMMON_UTIL_MERGE_HPP
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <memory>
+
+#include <boost/sort/common/util/algorithm.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/util/circular_buffer.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+namespace here = boost::sort::common::util;
+//----------------------------------------------------------------------------
+//
+//           F U N C T I O N S    I N    T H E     F I L E
+//----------------------------------------------------------------------------
+//
+// template < class Iter1_t, class Iter2_t, class Compare >
+// Iter2_t merge (Iter1_t buf1, const Iter1_t end_buf1, Iter1_t buf2,
+//                const Iter1_t end_buf2, Iter2_t buf_out, Compare comp)
+//
+// template < class Iter_t, class Value_t, class Compare >
+// Value_t *merge_construct (Iter_t first1, const Iter_t last1, Iter_t first2,
+//                           const Iter_t last2, Value_t *it_out, Compare comp)
+//
+// template < class Iter1_t, class Iter2_t, class Compare >
+// Iter2_t merge_half (Iter1_t buf1, const Iter1_t end_buf1, Iter2_t buf2,
+//                     const Iter2_t end_buf2, Iter2_t buf_out, Compare comp)
+//
+// template < class Iter1_t, class Iter2_t, class Compare >
+// Iter2_t merge_half_backward (Iter1_t buf1,  Iter1_t end_buf1,
+//                              Iter2_t buf2, Iter2_t end_buf2,
+//                              Iter1_t end_buf_out, Compare comp)
+//
+// template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare >
+// bool merge_uncontiguous (Iter1_t src1, const Iter1_t end_src1,
+//                          Iter2_t src2, const Iter2_t end_src2,
+//                          Iter3_t aux, Compare comp)
+//
+// template < class Iter1_t, class Iter2_t, class Compare >
+// bool merge_contiguous (Iter1_t src1, Iter1_t src2, Iter1_t end_src2,
+//                        Iter2_t buf, Compare comp)
+//
+// template < class Iter_t, class Circular ,class Compare >
+// bool merge_circular  (Iter_t buf1, Iter_t end_buf1,
+//                       Iter_t buf2, Iter_t end_buf2,
+//                       Circular &circ, Compare comp, Iter_t &it_aux)
+//
+//----------------------------------------------------------------------------
+//
+//-----------------------------------------------------------------------------
+//  function : merge
+/// @brief Merge two contiguous buffers pointed by buf1 and buf2, and put
+///        in the buffer pointed by buf_out
+///
+/// @param buf1 : iterator to the first element in the first buffer
+/// @param end_buf1 : final iterator of first buffer
+/// @param buf2 : iterator to the first iterator to the second buffer
+/// @param end_buf2 : final iterator of the second buffer
+/// @param buf_out : buffer where move the elements merged
+/// @param comp : comparison object
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Iter3_t, class Compare>
+static Iter3_t merge(Iter1_t buf1, const Iter1_t end_buf1, Iter2_t buf2,
+                     const Iter2_t end_buf2, Iter3_t buf_out, Compare comp)
+{
+    //-------------------------------------------------------------------------
+    //                       Metaprogramming
+    //------------------------------------------------------------------------- 
+    typedef value_iter<Iter1_t> value1_t;
+    typedef value_iter<Iter2_t> value2_t;
+    typedef value_iter<Iter3_t> value3_t;
+    static_assert (std::is_same< value1_t, value2_t >::value,
+                    "Incompatible iterators\n");
+    static_assert (std::is_same< value3_t, value2_t >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                       Code
+    //-------------------------------------------------------------------------
+    const size_t MIN_CHECK = 1024;
+
+    if (size_t((end_buf1 - buf1) + (end_buf2 - buf2)) >= MIN_CHECK)
+    {
+        if (buf1 == end_buf1) return move_forward(buf_out, buf2, end_buf2);
+        if (buf2 == end_buf2) return move_forward(buf_out, buf1, end_buf1);
+
+        if (not comp(*buf2, *(end_buf1 - 1)))
+        {
+            Iter3_t mid = move_forward(buf_out, buf1, end_buf1);
+            return move_forward(mid, buf2, end_buf2);
+        };
+
+        if (comp(*(end_buf2 - 1), *buf1))
+        {
+            Iter3_t mid = move_forward(buf_out, buf2, end_buf2);
+            return move_forward(mid, buf1, end_buf1);
+        };
+    };
+    while ((buf1 != end_buf1) and (buf2 != end_buf2))
+    {
+        *(buf_out++) = (not comp(*buf2, *buf1)) ?
+                        std::move(*(buf1++)) : std::move(*(buf2++));
+    };
+
+    return (buf1 == end_buf1) ?
+                    move_forward(buf_out, buf2, end_buf2) :
+                    move_forward(buf_out, buf1, end_buf1);
+}
+;
+//
+//-----------------------------------------------------------------------------
+//  function : merge_construct
+/// @brief Merge two contiguous buffers pointed by first1 and first2, and put
+///        in the uninitialized buffer pointed by it_out
+///
+/// @param first1 : iterator to the first element in the first buffer
+/// @param last1 : last iterator of the first buffer
+/// @param first2 : iterator to the first element to the second buffer
+/// @param last2 : final iterator of the second buffer
+/// @param it_out : uninitialized buffer where move the elements merged
+/// @param comp : comparison object
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Value_t, class Compare>
+static Value_t *merge_construct(Iter1_t first1, const Iter1_t last1,
+                                Iter2_t first2, const Iter2_t last2,
+                                Value_t *it_out, Compare comp)
+{
+    //-------------------------------------------------------------------------
+    //                       Metaprogramming
+    //------------------------------------------------------------------------- 
+    typedef value_iter<Iter1_t> type1;
+    typedef value_iter<Iter2_t> type2;
+    static_assert (std::is_same< Value_t, type1 >::value,
+                    "Incompatible iterators\n");
+    static_assert (std::is_same< Value_t, type2 >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                       Code
+    //-------------------------------------------------------------------------
+    const size_t MIN_CHECK = 1024;
+
+    if (size_t((last1 - first1) + (last2 - first2)) >= MIN_CHECK)
+    {
+        if (first1 == last1) return move_construct(it_out, first2, last2);
+        if (first2 == last2) return move_construct(it_out, first1, last1);
+
+        if (not comp(*first2, *(last1 - 1)))
+        {
+            Value_t* mid = move_construct(it_out, first1, last1);
+            return move_construct(mid, first2, last2);
+        };
+
+        if (comp(*(last2 - 1), *first1))
+        {
+            Value_t* mid = move_construct(it_out, first2, last2);
+            return move_construct(mid, first1, last1);
+        };
+    };
+    while (first1 != last1 and first2 != last2)
+    {
+        construct_object((it_out++),
+                        (not comp(*first2, *first1)) ?
+                                        std::move(*(first1++)) :
+                                        std::move(*(first2++)));
+    };
+    return (first1 == last1) ?
+                    move_construct(it_out, first2, last2) :
+                    move_construct(it_out, first1, last1);
+};
+//
+//---------------------------------------------------------------------------
+//  function : merge_half
+/// @brief : Merge two buffers. The first buffer is in a separate memory.
+///          The second buffer have a empty space before buf2 of the same size
+///          than the (end_buf1 - buf1)
+///
+/// @param buf1 : iterator to the first element of the first buffer
+/// @param end_buf1 : iterator to the last element of the first buffer
+/// @param buf2 : iterator to the first element of the second buffer
+/// @param end_buf2 : iterator to the last element of the second buffer
+/// @param buf_out : iterator to the first element to the buffer where put
+///                  the result
+/// @param comp : object for Compare two elements of the type pointed
+///                by the Iter1_t and Iter2_t
+//---------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static Iter2_t merge_half(Iter1_t buf1, const Iter1_t end_buf1, Iter2_t buf2,
+                          const Iter2_t end_buf2, Iter2_t buf_out, Compare comp)
+{
+    //-------------------------------------------------------------------------
+    //                         Metaprogramming
+    //------------------------------------------------------------------------- 
+    typedef value_iter<Iter1_t> value1_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value1_t, value2_t >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                         Code
+    //-------------------------------------------------------------------------
+#ifdef __BS_DEBUG
+    assert ( (buf2 - buf_out) == ( end_buf1 - buf1));
+#endif
+    const size_t MIN_CHECK = 1024;
+
+    if (size_t((end_buf1 - buf1) + (end_buf2 - buf2)) >= MIN_CHECK)
+    {
+        if (buf1 == end_buf1) return end_buf2;
+        if (buf2 == end_buf2) return move_forward(buf_out, buf1, end_buf1);
+
+        if (not comp(*buf2, *(end_buf1 - 1)))
+        {
+            move_forward(buf_out, buf1, end_buf1);
+            return end_buf2;
+        };
+
+        if (comp(*(end_buf2 - 1), *buf1))
+        {
+            Iter2_t mid = move_forward(buf_out, buf2, end_buf2);
+            return move_forward(mid, buf1, end_buf1);
+        };
+    };
+    while ((buf1 != end_buf1) and (buf2 != end_buf2))
+    {
+        *(buf_out++) = (not comp(*buf2, *buf1)) ?
+                        std::move(*(buf1++)) : std::move(*(buf2++));
+    };
+    return (buf2 == end_buf2)? move_forward(buf_out, buf1, end_buf1) : end_buf2;
+};
+
+//
+//---------------------------------------------------------------------------
+//  function : merge_half_backward
+/// @brief : Merge two buffers. The first buffer is in a separate memory.
+///          The second buffer have a empty space before buf2 of the same size
+///          than the (end_buf1 - buf1)
+///
+/// @param buf1 : iterator to the first element of the first buffer
+/// @param end_buf1 : iterator to the last element of the first buffer
+/// @param buf2 : iterator to the first element of the second buffer
+/// @param end_buf2 : iterator to the last element of the second buffer
+/// @param buf_out : iterator to the first element to the buffer where put
+///                  the result
+/// @param comp : object for Compare two elements of the type pointed
+///                by the Iter1_t and Iter2_t
+//---------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static Iter2_t merge_half_backward(Iter1_t buf1, Iter1_t end_buf1, Iter2_t buf2,
+                                   Iter2_t end_buf2, Iter1_t end_buf_out,
+                                   Compare comp)
+{
+    //-------------------------------------------------------------------------
+    //                         Metaprogramming
+    //-------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value1_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same< value1_t, value2_t >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                         Code
+    //-------------------------------------------------------------------------
+#ifdef __BS_DEBUG
+    assert ((end_buf_out - end_buf1) == (end_buf2 - buf2) );
+#endif
+    const size_t MIN_CHECK = 1024;
+
+    if (size_t((end_buf1 - buf1) + (end_buf2 - buf2)) >= MIN_CHECK)
+    {
+        if (buf2 == end_buf2) return buf1;
+        if (buf1 == end_buf1)
+            return here::move_backward(end_buf_out, buf2, end_buf2);
+
+        if (not comp(*buf2, *(end_buf1 - 1)))
+        {
+            here::move_backward(end_buf_out, buf2, end_buf2);
+            return buf1;
+        };
+
+        if (comp(*(end_buf2 - 1), *buf1))
+        {
+            Iter1_t mid = here::move_backward(end_buf_out, buf1, end_buf1);
+            return here::move_backward(mid, buf2, end_buf2);
+        };
+    };
+    while ((buf1 != end_buf1) and (buf2 != end_buf2))
+    {
+        *(--end_buf_out) =
+                        (not comp(*(end_buf2 - 1), *(end_buf1 - 1))) ?
+                                        std::move(*(--end_buf2)):
+                                        std::move(*(--end_buf1));
+    };
+    return (buf1 == end_buf1) ?
+                    here::move_backward(end_buf_out, buf2, end_buf2) : buf1;
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : merge_uncontiguous
+/// @brief : merge two uncontiguous buffers, placing the results in the buffers
+///          Use an auxiliary buffer pointed by aux
+///
+/// @param src1 : iterator to the first element of the first buffer
+/// @param end_src1 : last iterator  of the first buffer
+/// @param src2 : iterator to the first element of the second buffer
+/// @param end_src2 : last iterator  of the second buffer
+/// @param aux  : iterator to the first element of the auxiliary buffer
+/// @param comp : object for to Compare elements
+/// @return true : not changes done,  false : changes in the buffers
+/// @remarks
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Iter3_t, class Compare>
+static bool merge_uncontiguous(Iter1_t src1, const Iter1_t end_src1,
+                               Iter2_t src2, const Iter2_t end_src2,
+                               Iter3_t aux, Compare comp)
+{
+    //-------------------------------------------------------------------------
+    //                    Metaprogramming
+    //------------------------------------------------------------------------- 
+    typedef value_iter<Iter1_t> type1;
+    typedef value_iter<Iter2_t> type2;
+    typedef value_iter<Iter3_t> type3;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+    static_assert (std::is_same< type3, type2 >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                    Code
+    //-------------------------------------------------------------------------
+    if (src1 == end_src1 or src2 == end_src2
+                    or not comp(*src2, *(end_src1 - 1))) return true;
+
+    while (src1 != end_src1 and not comp(*src2, *src1))
+        ++src1;
+
+    Iter3_t const end_aux = aux + (end_src1 - src1);
+    Iter2_t src2_first = src2;
+    move_forward(aux, src1, end_src1);
+
+    while ((src1 != end_src1) and (src2 != end_src2))
+    {
+        *(src1++) = std::move((not comp(*src2, *aux)) ? *(aux++) : *(src2++));
+    }
+
+    if (src2 == end_src2)
+    {
+        while (src1 != end_src1)
+            *(src1++) = std::move(*(aux++));
+        move_forward(src2_first, aux, end_aux);
+    }
+    else
+    {
+        merge_half(aux, end_aux, src2, end_src2, src2_first, comp);
+    };
+    return false;
+};
+
+//
+//-----------------------------------------------------------------------------
+//  function : merge_contiguous
+/// @brief : merge two contiguous buffers,using an auxiliary buffer pointed
+///          by buf. The results are in src1 and src2
+///
+/// @param src1: iterator to the first position of the first buffer
+/// @param src2: final iterator of the first buffer and first iterator
+///              of the second buffer
+/// @param end_src2 : final iterator of the second buffer
+/// @param buf  : iterator to buffer used as auxiliary memory
+/// @param comp : object for to Compare elements
+/// @return true : not changes done,  false : changes in the buffers
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static bool merge_contiguous(Iter1_t src1, Iter1_t src2, Iter1_t end_src2,
+                             Iter2_t buf, Compare comp)
+{
+    //-------------------------------------------------------------------------
+    //                      Metaprogramming
+    //------------------------------------------------------------------------- 
+    typedef value_iter<Iter1_t> type1;
+    typedef value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                         Code
+    //-------------------------------------------------------------------------
+    if (src1 == src2 or src2 == end_src2 or not comp(*src2, *(src2 - 1)))
+        return true;
+
+    Iter1_t end_src1 = src2;
+    while (src1 != end_src1 and not comp(*src2, *src1))
+        ++src1;
+
+    if (src1 == end_src1) return false;
+
+    size_t nx = end_src1 - src1;
+    move_forward(buf, src1, end_src1);
+    merge_half(buf, buf + nx, src2, end_src2, src1, comp);
+    return false;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : merge_circular
+/// @brief : merge two buffers,using a circular buffer
+///          This function don't check the parameters
+/// @param buf1: iterator to the first position of the first buffer
+/// @param end_buf1: iterator after the last element of the first buffer
+/// @param buf2: iterator to the first element of the secind buffer
+/// @param end_buf2: iterator to the first element of the secind buffer
+/// @param circ : circular buffer
+/// @param comp : comparison object
+/// @return true : finished buf1,  false : finished buf2
+/// @comments : be carefully because the iterators buf1 and buf2 are modified
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Circular, class Compare>
+static bool merge_circular(Iter1_t buf1, Iter1_t end_buf1, Iter2_t buf2,
+                           Iter2_t end_buf2, Circular &circ, Compare comp,
+                           Iter1_t &it1_out, Iter2_t &it2_out)
+{
+    //-------------------------------------------------------------------------
+    //                      Metaprogramming
+    //-------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> type1;
+    typedef value_iter<Iter2_t> type2;
+    static_assert (std::is_same< type1, type2 >::value,
+                    "Incompatible iterators\n");
+    typedef typename Circular::value_t type3;
+    static_assert (std::is_same<type1, type3>::value,
+                    "Incompatible iterators\n");
+
+    //-------------------------------------------------------------------------
+    //                      Code
+    //-------------------------------------------------------------------------
+#ifdef __BS_DEBUG
+    assert ( circ.free_size() >= size_t ((end_buf1-buf1) + (end_buf2-buf2)));
+#endif
+
+    if (not comp(*buf2, *(end_buf1 - 1)))
+    {
+        circ.push_move_back(buf1, (end_buf1 - buf1));
+        it1_out = end_buf1;
+        it2_out = buf2;
+        return true;
+    };
+    if (comp(*(end_buf2 - 1), *buf1))
+    {
+        circ.push_move_back(buf2, (end_buf2 - buf2));
+        it1_out = buf1;
+        it2_out = end_buf2;
+        return false;
+    }
+    while (buf1 != end_buf1 and buf2 != end_buf2)
+    {
+        circ.push_back(comp(*buf2, *buf1) ? std::move(*(buf2++))
+                                          : std::move(*(buf1++)));
+    };
+    it2_out = buf2;
+    it1_out = buf1;
+    bool ret = (buf1 == end_buf1);
+    return ret;
+};
+//
+//****************************************************************************
+};//    End namespace util
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/util/search.hpp b/boost/sort/common/util/search.hpp
new file mode 100644
index 0000000000..fbe056e2f8
--- /dev/null
+++ b/boost/sort/common/util/search.hpp
@@ -0,0 +1,529 @@
+//----------------------------------------------------------------------------
+/// @file search.hpp
+/// @brief
+/// @author Copyright (c) 2017 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See copy at http://www.boost.org/LICENSE_1_0.txt  )
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_SEARCH_HPP
+#define __BOOST_SORT_COMMON_SEARCH_HPP
+
+#include <boost/sort/common/util/traits.hpp>
+#include <cassert>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+
+template<class T>
+struct filter_pass
+{
+    typedef T key;
+    const key & operator()(const T & val) const
+    {
+        return val;
+    };
+};
+
+//
+//###########################################################################
+//                                                                         ##
+//    ################################################################     ##
+//    #                                                              #     ##
+//    #           I N T E R N A L      F U N C T I O N S             #     ##
+//    #                                                              #     ##
+//    ################################################################     ##
+//                                                                         ##
+//                       I M P O R T A N T                                 ##
+//                                                                         ##
+// These functions are not directly callable by the user, are for internal ##
+// use only.                                                               ##
+// These functions don't check the parameters                              ##
+//                                                                         ##
+//###########################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : internal_find_first
+/// @brief find if a value exist in the range [first, last).
+///        Always return as valid iterator in the range [first, last-1]
+///        If exist return the iterator to the first occurrence. If don't exist
+///        return the first greater than val.
+///        If val is greater than the *(last-1), return (last-1)
+///        If val is lower than  (*first), return  first
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found,
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+          class Compare = std::less<typename Filter::key> >
+inline Iter_t internal_find_first(Iter_t first, Iter_t last,
+                                  const typename Filter::key &val,
+                                  const Compare & comp = Compare(), 
+                                  Filter flt = Filter())
+{
+    Iter_t LI = first, LS = last - 1, it_out = first;
+    while (LI != LS)
+    {
+        it_out = LI + ((LS - LI) >> 1);
+        if (comp(flt(*it_out), val))
+            LI = it_out + 1;
+        else LS = it_out;
+    };
+    return LS;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : internal_find_last
+/// @brief find if a value exist in the range [first, last).
+///        Always return as valid iterator in the range [first, last-1]
+///        If exist return the iterator to the last occurrence.
+///        If don't exist return the first lower than val.
+///        If val is greater than *(last-1) return (last-1).
+///        If is lower than the first, return first
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found, if not found return last
+
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+                class Compare = std::less<typename Filter::key> >
+inline Iter_t internal_find_last(Iter_t first, Iter_t last,
+                                 const typename Filter::key &val,
+                                 const Compare & comp = Compare(), Filter flt =
+                                                 Filter())
+{
+    Iter_t LI = first, LS = last - 1, it_out = first;
+    while (LI != LS)
+    {
+        it_out = LI + ((LS - LI + 1) >> 1);
+        if (comp(val, flt(*it_out))) LS = it_out - 1;
+        else                         LI = it_out;
+    };
+    return LS;
+};
+
+//
+//###########################################################################
+//                                                                         ##
+//    ################################################################     ##
+//    #                                                              #     ##
+//    #              P U B L I C       F U N C T I O N S             #     ##
+//    #                                                              #     ##
+//    ################################################################     ##
+//                                                                         ##
+//###########################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : find_first
+/// @brief find if a value exist in the range [first, last). If exist return the
+///        iterator to the first occurrence. If don't exist return last
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found, and if not last
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+                class Compare = std::less<typename Filter::key> >
+inline Iter_t find_first(Iter_t first, Iter_t last,
+                         const typename Filter::key &val, 
+                         const Compare & comp = Compare(),
+                         Filter flt = Filter())
+{
+    assert((last - first) >= 0);
+    if (first == last) return last;
+    Iter_t LS = internal_find_first(first, last, val, comp, flt);
+    return (comp(flt(*LS), val) or comp(val, flt(*LS))) ? last : LS;
+};
+//
+//-----------------------------------------------------------------------------
+//  function : find_last
+/// @brief find if a value exist in the range [first, last). If exist return the
+///        iterator to the last occurrence. If don't exist return last
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found, if not found return last
+
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+          class Compare = std::less<typename Filter::key> >
+inline Iter_t find_last(Iter_t first, Iter_t last,
+                        const typename Filter::key &val, 
+                        const Compare & comp = Compare(),
+                        Filter flt = Filter())
+{
+    assert((last - first) >= 0);
+    if (last == first) return last;
+    Iter_t LS = internal_find_last(first, last, val, comp, flt);
+    return (comp(flt(*LS), val) or comp(val, flt(*LS))) ? last : LS;
+};
+
+//----------------------------------------------------------------------------
+//  function : lower_bound
+/// @brief Returns an iterator pointing to the first element in the range
+///        [first, last) that is not less than (i.e. greater or equal to) val.
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+                class Compare = std::less<typename Filter::key> >
+inline Iter_t lower_bound(Iter_t first, Iter_t last,
+                          const typename Filter::key &val,
+                          const Compare & comp = Compare(), 
+                          Filter flt = Filter())
+{
+    assert((last - first) >= 0);
+    if (last == first) return last;
+    Iter_t itaux = internal_find_first(first, last, val, comp, flt);
+    return (itaux == (last - 1) and comp(flt(*itaux), val)) ? last : itaux;
+};
+//----------------------------------------------------------------------------
+//  function :upper_bound
+/// @brief return the first element greather than val.If don't exist
+///        return last
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found
+/// @remarks
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+                class Compare = std::less<typename Filter::key> >
+inline Iter_t upper_bound(Iter_t first, Iter_t last,
+                          const typename Filter::key &val,
+                          const Compare & comp = Compare(), 
+                          Filter flt = Filter())
+{
+    assert((last - first) >= 0);
+    if (last == first) return last;
+    Iter_t itaux = internal_find_last(first, last, val, comp, flt);
+    return (itaux == first and comp(val, flt(*itaux))) ? itaux : itaux + 1;
+}
+;
+//----------------------------------------------------------------------------
+//  function :equal_range
+/// @brief return a pair of lower_bound and upper_bound with the value val.If
+///        don't exist return last in the two elements of the pair
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return pair of iterators
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+         class Compare = std::less<typename Filter::key> >
+inline std::pair<Iter_t, Iter_t> equal_range(Iter_t first, Iter_t last,
+                                             const typename Filter::key &val,
+                                             const Compare & comp = Compare(),
+                                             Filter flt = Filter())
+{
+    return std::make_pair(lower_bound(first, last, val, comp, flt),
+                    upper_bound(first, last, val, comp, flt));
+};
+//
+//-----------------------------------------------------------------------------
+//  function : insert_first
+/// @brief find if a value exist in the range [first, last). If exist return the
+///        iterator to the first occurrence. If don't exist return last
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found, and if not last
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+                class Compare = std::less<typename Filter::key> >
+inline Iter_t insert_first(Iter_t first, Iter_t last,
+                           const typename Filter::key &val,
+                           const Compare & comp = Compare(), Filter flt =
+                                           Filter())
+{
+    return lower_bound(first, last, val, comp, flt);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : insert_last
+/// @brief find if a value exist in the range [first, last). If exist return the
+///        iterator to the last occurrence. If don't exist return last
+//
+/// @param [in] first : iterator to the first element of the range
+/// @param [in] last : iterator to the last element of the range
+/// @param [in] val : value to find
+/// @param [in] comp : object for to compare two value_t objects
+/// @return iterator to the element found, if not found return last
+
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Filter = filter_pass<value_iter<Iter_t> >,
+                class Compare = std::less<typename Filter::key> >
+inline Iter_t insert_last(Iter_t first, Iter_t last,
+                          const typename Filter::key &val,
+                          const Compare & comp = Compare(), Filter flt =
+                                          Filter())
+{
+    return upper_bound(first, last, val, comp, flt);
+};
+
+/*
+
+ //
+ //###########################################################################
+ //                                                                         ##
+ //    ################################################################     ##
+ //    #                                                              #     ##
+ //    #           I N T E R N A L      F U N C T I O N S             #     ##
+ //    #                                                              #     ##
+ //    ################################################################     ##
+ //                                                                         ##
+ //                       I M P O R T A N T                                 ##
+ //                                                                         ##
+ // These functions are not directly callable by the user, are for internal ##
+ // use only.                                                               ##
+ // These functions don't check the parameters                              ##
+ //                                                                         ##
+ //###########################################################################
+ //
+ //-----------------------------------------------------------------------------
+ //  function : internal_find_first
+ /// @brief find if a value exist in the range [first, last).
+ ///        Always return as valid iterator in the range [first, last-1]
+ ///        If exist return the iterator to the first occurrence. If don't exist
+ ///        return the first greater than val.
+ ///        If val is greater than the *(last-1), return (last-1)
+ ///        If val is lower than  (*first), return  first
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found,
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t>  >
+ inline Iter_t internal_find_first ( Iter_t first, Iter_t last,
+ const value_iter<Iter_t> &val,
+ const Compare & comp= Compare()  )
+ {
+ Iter_t LI = first , LS = last - 1, it_out = first;
+ while ( LI != LS)
+ {   it_out = LI + ( (LS - LI) >> 1);
+ if ( comp ( *it_out, val)) LI = it_out + 1 ; else LS = it_out ;
+ };
+ return LS ;
+ };
+ //
+ //-----------------------------------------------------------------------------
+ //  function : internal_find_last
+ /// @brief find if a value exist in the range [first, last).
+ ///        Always return as valid iterator in the range [first, last-1]
+ ///        If exist return the iterator to the last occurrence.
+ ///        If don't exist return the first lower than val.
+ ///        If val is greater than *(last-1) return (last-1).
+ ///        If is lower than the first, return first
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found, if not found return last
+
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t internal_find_last ( Iter_t first, Iter_t last ,
+ const value_iter<Iter_t> &val,
+ const Compare &comp= Compare() )
+ {
+ Iter_t LI = first , LS = last - 1, it_out = first ;
+ while ( LI != LS)
+ {   it_out = LI + ( (LS - LI + 1) >> 1);
+ if ( comp (val, *it_out)) LS = it_out - 1 ; else LI = it_out ;
+ };
+ return LS ;
+ };
+
+ //
+ //###########################################################################
+ //                                                                         ##
+ //    ################################################################     ##
+ //    #                                                              #     ##
+ //    #              P U B L I C       F U N C T I O N S             #     ##
+ //    #                                                              #     ##
+ //    ################################################################     ##
+ //                                                                         ##
+ //###########################################################################
+ //
+ //-----------------------------------------------------------------------------
+ //  function : find_first
+ /// @brief find if a value exist in the range [first, last). If exist return the
+ ///        iterator to the first occurrence. If don't exist return last
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found, and if not last
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t find_first ( Iter_t first, Iter_t last,
+ const value_iter<Iter_t> &val,
+ Compare comp = Compare() )
+ {
+ assert ( (last - first) >= 0 );
+ if ( first == last) return last ;
+ Iter_t LS = internal_find_first ( first, last, val, comp);
+ return (comp (*LS, val) or comp (val, *LS))?last:LS;
+ };
+ //
+ //-----------------------------------------------------------------------------
+ //  function : find_last
+ /// @brief find if a value exist in the range [first, last). If exist return the
+ ///        iterator to the last occurrence. If don't exist return last
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found, if not found return last
+
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t find_last ( Iter_t first, Iter_t last ,
+ const value_iter<Iter_t> &val,
+ Compare comp = Compare())
+ {
+ assert ( (last - first ) >= 0 );
+ if ( last == first ) return last ;
+ Iter_t LS = internal_find_last (first, last, val, comp);
+ return (comp (*LS, val) or comp (val, *LS))?last:LS ;
+ };
+
+ //----------------------------------------------------------------------------
+ //  function : lower_bound
+ /// @brief Returns an iterator pointing to the first element in the range
+ ///        [first, last) that is not less than (i.e. greater or equal to) val.
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t lower_bound ( Iter_t first, Iter_t last ,
+ const value_iter<Iter_t> &val,
+ Compare &comp = Compare() )
+ {
+ assert ( (last - first ) >= 0 );
+ if ( last == first ) return last ;
+ Iter_t  itaux = internal_find_first( first, last, val,comp);
+ return (itaux == (last - 1) and comp (*itaux, val))?last: itaux;
+ };
+ //----------------------------------------------------------------------------
+ //  function :upper_bound
+ /// @brief return the first element greather than val.If don't exist
+ ///        return last
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found
+ /// @remarks
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t upper_bound ( Iter_t first, Iter_t last ,
+ const value_iter<Iter_t> &val,
+ Compare &comp = Compare() )
+ {
+ assert ( (last - first ) >= 0 );
+ if ( last == first ) return last ;
+ Iter_t itaux = internal_find_last( first, last, val,comp);
+ return ( itaux == first and comp (val,*itaux))? itaux: itaux + 1;
+ };
+ //----------------------------------------------------------------------------
+ //  function :equal_range
+ /// @brief return a pair of lower_bound and upper_bound with the value val.If
+ ///        don't exist return last in the two elements of the pair
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return pair of iterators
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline std::pair<Iter_t, Iter_t> equal_range ( Iter_t first, Iter_t last ,
+ const value_iter<Iter_t> &val,
+ Compare &comp = Compare() )
+ {
+ return std::make_pair(lower_bound(first, last, val,comp),
+ upper_bound(first, last, val,comp));
+ };
+ //
+ //-----------------------------------------------------------------------------
+ //  function : insert_first
+ /// @brief find if a value exist in the range [first, last). If exist return the
+ ///        iterator to the first occurrence. If don't exist return last
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found, and if not last
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t insert_first ( Iter_t first, Iter_t last,
+ const value_iter<Iter_t> &val,
+ Compare comp = Compare() )
+ {
+ return lower_bound (first, last, val, comp);
+ };
+ //
+ //-----------------------------------------------------------------------------
+ //  function : insert_last
+ /// @brief find if a value exist in the range [first, last). If exist return the
+ ///        iterator to the last occurrence. If don't exist return last
+ //
+ /// @param [in] first : iterator to the first element of the range
+ /// @param [in] last : iterator to the last element of the range
+ /// @param [in] val : value to find
+ /// @param [in] comp : object for to compare two value_t objects
+ /// @return iterator to the element found, if not found return last
+
+ //-----------------------------------------------------------------------------
+ template < class Iter_t, class Compare = compare_iter<Iter_t> >
+ inline Iter_t insert_last ( Iter_t first, Iter_t last ,
+ const value_iter<Iter_t> &val,
+ Compare comp = Compare())
+ {
+ return upper_bound (first, last, val, comp);
+ };
+
+ */
+//
+//****************************************************************************
+};//    End namespace util
+};//    End namespace common
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/common/util/traits.hpp b/boost/sort/common/util/traits.hpp
new file mode 100644
index 0000000000..68e5cf0359
--- /dev/null
+++ b/boost/sort/common/util/traits.hpp
@@ -0,0 +1,123 @@
+//----------------------------------------------------------------------------
+/// @file traits.hpp
+/// @brief this file contains the metaprogramming classes  compare_iter and
+///         enable_if_not_integral
+/// @author Copyright(c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_COMMON_UTIL_TRAITS_HPP
+#define __BOOST_SORT_COMMON_UTIL_TRAITS_HPP
+
+#include <functional>
+#include <iterator>
+#include <type_traits>
+
+namespace boost
+{
+namespace sort
+{
+namespace common
+{
+namespace util
+{
+//----------------------------------------------------------------------------
+//                  USING SENTENCES
+//----------------------------------------------------------------------------
+using std::iterator_traits;
+
+//
+//---------------------------------------------------------------------------
+/// @class value_iter
+/// @brief From the iterator, obtain the type pointed by it
+/// @remarks The main utility of this, is simplify the default template
+///          parameter of comparison
+//---------------------------------------------------------------------------
+template<class iter_t>
+using value_iter = typename iterator_traits< iter_t >::value_type;
+//
+//---------------------------------------------------------------------------
+/// @class compare_iter
+/// @brief From the iterator, received as template parameter, obtain the type
+///        of the object pointed by the iterator, and with this define the
+///        std::less with this type obtained
+/// @remarks The main utility of this, is simplify the default template
+///          parameter of comparison
+//---------------------------------------------------------------------------
+template<class iter_t>
+using compare_iter =  std::less< value_iter< iter_t > >;
+
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_not_integral
+/// @brief This is a SFINAE class for to detect if the third parameter in the
+///        invocation of the parallel sorting algorithms is an integer
+///        representing the number of threads to use or is a comparison object
+/// @remarks
+//---------------------------------------------------------------------------
+template<class T>
+using enable_if_not_integral =
+      typename std::enable_if< !std::is_integral< T >::value >::type;
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_integral
+/// @brief This is a SFINAE class for to detect if the third parameter in the
+///        invocation of the parallel sorting algorithms is an integer
+///        representing the number of threads to use or is a comparison object
+/// @remarks
+//---------------------------------------------------------------------------
+template<class T>
+using enable_if_integral =
+      typename std::enable_if< std::is_integral< T >::value >::type;
+
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_string
+/// @brief This is a SFINAE class for to detect if the parameter is a
+///        std::string for to apply specialized parameters in the invocation
+///        of the block_indirect_sort algorithm
+/// @remarks
+//---------------------------------------------------------------------------
+template<class T>
+using enable_if_string =
+      typename std::enable_if< std::is_same< T, std::string >::value >::type;
+
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_not_string
+/// @brief This is a SFINAE class for to detect if the parameter is a
+///        std::string for to apply specialized parameters in the invocation
+///        of the block_indirect_sort algorithm
+/// @remarks
+//---------------------------------------------------------------------------
+template<class T>
+using enable_if_not_string =
+      typename std::enable_if<! std::is_same< T, std::string >::value >::type;
+
+//
+//---------------------------------------------------------------------------
+/// @class constructor
+/// @brief create a functor with the constructor of a class for to be invoked
+///        from a bind or a lambda
+/// @remarks
+//---------------------------------------------------------------------------
+template<class T>
+struct constructor
+{
+    template<class ... Args>
+    void operator()(Args && ... args)
+    {
+        T(std::forward<Args> (args) ...);
+    };
+};
+//
+//****************************************************************************
+};// End namespace util
+};// End namespace common
+};// End namespace sort
+};// End namespace boost
+//****************************************************************************
+#endif
diff --git a/boost/sort/flat_stable_sort/flat_stable_sort.hpp b/boost/sort/flat_stable_sort/flat_stable_sort.hpp
new file mode 100644
index 0000000000..ee48e7b9a0
--- /dev/null
+++ b/boost/sort/flat_stable_sort/flat_stable_sort.hpp
@@ -0,0 +1,312 @@
+//----------------------------------------------------------------------------
+/// @file flat_stable_sort.hpp
+/// @brief Flat stable sort algorithm
+///
+/// @author Copyright (c) 2017 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_FLAT_STABLE_SORT_HPP
+#define __BOOST_SORT_FLAT_STABLE_SORT_HPP
+
+#include <boost/sort/insert_sort/insert_sort.hpp>
+#include <boost/sort/common/util/insert.hpp>
+#include <boost/sort/common/merge_block.hpp>
+#include <boost/sort/common/sort_basic.hpp>
+#include <boost/sort/common/range.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/indirect.hpp>
+
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace flat_internal
+{
+namespace bsc = boost::sort::common;
+namespace bscu = boost::sort::common::util;
+//---------------------------------------------------------------------------
+/// @struct flat_stable_sort
+/// @brief  This class implement s stable sort algorithm with 1 thread, with
+///         an auxiliary memory of N/2 elements
+//----------------------------------------------------------------------------
+template <class Iter_t, typename Compare = bscu::compare_iter<Iter_t>,
+           uint32_t Power2 = 10>
+class flat_stable_sort: public bsc::merge_block<Iter_t, Compare, Power2>
+{
+    //------------------------------------------------------------------------
+    //               DEFINITIONS AND CONSTANTS
+    //------------------------------------------------------------------------
+    typedef bsc::merge_block<Iter_t, Compare, Power2> merge_block_t;
+
+    //-------------------------------------------------------------------------
+    //                  D E F I N I T I O N S
+    //-------------------------------------------------------------------------
+    typedef typename merge_block_t::value_t value_t;
+    typedef typename merge_block_t::range_pos range_pos;
+    typedef typename merge_block_t::range_it range_it;
+    typedef typename merge_block_t::range_buf range_buf;
+    typedef typename merge_block_t::it_index it_index;
+    typedef typename merge_block_t::circular_t circular_t;
+
+    //------------------------------------------------------------------------
+    //                          CONSTANTS
+    //------------------------------------------------------------------------
+    using merge_block_t::BLOCK_SIZE;
+    using merge_block_t::LOG_BLOCK;
+
+    using merge_block_t::index;
+    using merge_block_t::cmp;
+    using merge_block_t::ptr_circ;
+
+    using merge_block_t::get_range;
+    using merge_block_t::get_group_range;
+    using merge_block_t::merge_range_pos;
+    using merge_block_t::move_range_pos_backward;
+    using merge_block_t::rearrange_with_index;
+
+public:
+    //------------------------------------------------------------------------
+    //                   PUBLIC FUNCTIONS
+    //-------------------------------------------------------------------------
+    flat_stable_sort(Iter_t first, Iter_t last, Compare comp,
+                     circular_t *ptr_circ)
+                    : merge_block_t(first, last, comp, ptr_circ)
+    {
+        divide(index.begin(), index.end());
+        rearrange_with_index();
+    };
+
+    flat_stable_sort(Iter_t first, Iter_t last, Compare comp = Compare())
+                    : flat_stable_sort(first, last, comp, nullptr) { };
+
+    void divide(it_index itx_first, it_index itx_last);
+
+    void sort_small(it_index itx_first, it_index itx_last);
+
+    bool is_sorted_forward(it_index itx_first, it_index itx_last);
+
+    bool is_sorted_backward(it_index itx_first, it_index itx_last);
+};
+//----------------------------------------------------------------------------
+//  End of class flat_stable_sort
+//----------------------------------------------------------------------------
+//
+//------------------------------------------------------------------------
+//  function :
+/// @brief :
+/// @param Pos :
+/// @return
+//------------------------------------------------------------------------
+template <class Iter_t, typename Compare, uint32_t Power2>
+void flat_stable_sort <Iter_t, Compare, Power2>
+::divide(it_index itx_first, it_index itx_last)
+{
+    size_t nblock = size_t(itx_last - itx_first);
+    if (nblock < 5)
+    {   sort_small(itx_first, itx_last);
+        return;
+    };
+    if ( nblock > 7)
+    {   if (is_sorted_forward(itx_first, itx_last)) return;
+        if (is_sorted_backward(itx_first, itx_last)) return;
+    };
+    size_t nblock1 = (nblock + 1) >> 1;
+    divide(itx_first, itx_first + nblock1);
+    divide(itx_first + nblock1, itx_last);
+    merge_range_pos(itx_first, itx_first + nblock1, itx_last);
+};
+//
+//------------------------------------------------------------------------
+//  function : sort_small
+/// @brief :
+/// @param
+/// @param
+/// @param
+//------------------------------------------------------------------------
+template <class Iter_t, typename Compare, uint32_t Power2>
+void flat_stable_sort <Iter_t, Compare, Power2>
+::sort_small(it_index itx_first, it_index itx_last)
+{
+    size_t nblock = size_t(itx_last - itx_first);
+    assert(nblock > 0 and nblock < 5);
+    value_t *paux = ptr_circ->get_buffer();
+    range_it rng_data = get_group_range(*itx_first, nblock);
+
+    if (nblock < 3)
+    {
+        range_buf rng_aux(paux, paux + rng_data.size());
+        range_sort_data(rng_data, rng_aux, cmp);
+        return;
+    };
+
+    //--------------------------------------------------------------------
+    // division of range_data in two ranges for be sorted and merged
+    //--------------------------------------------------------------------
+    size_t nblock1 = (nblock + 1) >> 1;
+    range_it rng_data1 = get_group_range(*itx_first, nblock1);
+    range_it rng_data2(rng_data1.last, rng_data.last);
+    range_buf rng_aux1(paux, paux + rng_data1.size());
+    range_buf rng_aux2(paux, paux + rng_data2.size());
+
+    range_sort_data(rng_data2, rng_aux2, cmp);
+    range_sort_buffer(rng_data1, rng_aux1, cmp);
+    merge_half(rng_data, rng_aux1, rng_data2, cmp);
+};
+//
+//------------------------------------------------------------------------
+//  function : is_sorted_forward
+/// @brief : return if the data are ordered,
+/// @param itx_first : iterator to the first block in the index
+/// @param itx_last : iterator to the last block in the index
+/// @return : true : the data are ordered false : not ordered
+//------------------------------------------------------------------------
+template <class Iter_t, typename Compare, uint32_t Power2>
+bool flat_stable_sort <Iter_t, Compare, Power2>
+::is_sorted_forward(it_index itx_first, it_index itx_last)
+{
+    size_t nblock = size_t(itx_last - itx_first);
+    range_it rng = get_group_range(*itx_first, nblock);
+    size_t nelem = rng.size();
+    size_t min_process = std::max(BLOCK_SIZE, (nelem >> 3));
+
+    size_t nsorted1 = bsc::number_stable_sorted_forward (rng.first, rng.last,
+                                                         min_process, cmp);
+    if (nsorted1 == nelem) return true;
+    if (nsorted1 == 0) return false;
+
+    size_t nsorted2 = nelem - nsorted1;
+    Iter_t itaux = rng.first + nsorted1;
+    if (nsorted2 <= (BLOCK_SIZE << 1))
+    {
+        flat_stable_sort(itaux, rng.last, cmp, ptr_circ);
+        bscu::insert_sorted(rng.first, itaux, rng.last, cmp,
+                            ptr_circ->get_buffer());
+    }
+    else
+    {   // Adjust the size of the sorted data to a number of blocks
+        size_t mask = ~(BLOCK_SIZE - 1);
+        size_t nsorted1_adjust = nsorted1 & mask;
+        flat_stable_sort(rng.first + nsorted1_adjust, rng.last, cmp,
+                         ptr_circ);
+        size_t nblock1 = nsorted1_adjust >> Power2;
+        merge_range_pos(itx_first, itx_first + nblock1, itx_last);
+    };
+    return true;
+};
+//
+//------------------------------------------------------------------------
+//  function : is_sorted_backward
+/// @brief : return if the data are ordered,
+/// @param itx_first : iterator to the first block in the index
+/// @param itx_last : iterator to the last block in the index
+/// @return : true : the data are ordered false : not ordered
+//------------------------------------------------------------------------
+template <class Iter_t, typename Compare, uint32_t Power2>
+bool flat_stable_sort <Iter_t, Compare, Power2>
+::is_sorted_backward(it_index itx_first, it_index itx_last)
+{
+    size_t nblock = size_t(itx_last - itx_first);
+    range_it rng = get_group_range(*itx_first, nblock);
+
+    size_t nelem = rng.size();
+    size_t min_process = std::max(BLOCK_SIZE, (nelem >> 3));
+
+    size_t nsorted2 = bsc::number_stable_sorted_backward(rng.first, rng.last,
+                                                         min_process, cmp);
+    if (nsorted2 == nelem) return true;
+    if (nsorted2 == 0 ) return false;
+    Iter_t itaux = rng.last - nsorted2;
+    size_t nsorted1 = nelem - nsorted2;
+
+    if (nsorted1 <= (BLOCK_SIZE << 1))
+    {
+        flat_stable_sort(rng.first, itaux, cmp, ptr_circ);
+        bscu::insert_sorted_backward(rng.first, itaux, rng.last, cmp,
+                                     ptr_circ->get_buffer());
+    }
+    else
+    {   // Adjust the size of nsorted2 for to be a number of blocks
+        size_t nblock1 = (nsorted1 + BLOCK_SIZE - 1) >> Power2;
+        size_t nsorted1_adjust = (nblock1 << Power2);
+        flat_stable_sort(rng.first, rng.first + nsorted1_adjust, cmp,
+                         ptr_circ);
+        merge_range_pos(itx_first, itx_first + nblock1, itx_last);
+    };
+    return true;
+};
+//****************************************************************************
+};// End namespace flat_internal
+//****************************************************************************
+//
+namespace bscu = boost::sort::common::util;
+namespace flat = boost::sort::flat_internal;
+//
+///---------------------------------------------------------------------------
+//  function flat_stable_sort
+/// @brief This class is select the block size in the block_indirect_sort
+///        algorithm depending of the type and size of the data to sort
+///
+//----------------------------------------------------------------------------
+template <class Iter_t, class Compare = bscu::compare_iter<Iter_t>,
+           bscu::enable_if_string<value_iter<Iter_t> > * = nullptr>
+inline void flat_stable_sort (Iter_t first, Iter_t last,
+                                 Compare cmp = Compare())
+{
+    flat::flat_stable_sort<Iter_t, Compare, 6> (first, last, cmp);
+};
+
+template<size_t Size>
+struct block_size_fss
+{
+    static constexpr const uint32_t BitsSize =
+                    (Size == 0) ? 0 : (Size > 128) ? 7 : bscu::tmsb[Size - 1];
+    static constexpr const uint32_t sz[10] =
+    { 10, 10, 10, 9, 8, 7, 6, 6 };
+    static constexpr const uint32_t data = sz[BitsSize];
+};
+
+//
+///---------------------------------------------------------------------------
+//  function flat_stable_sort
+/// @brief This class is select the block size in the flat_stable_sort
+///        algorithm depending of the type and size of the data to sort
+///
+//----------------------------------------------------------------------------
+template <class Iter_t, class Compare = bscu::compare_iter<Iter_t>,
+           bscu::enable_if_not_string<value_iter<Iter_t> >* = nullptr>
+inline void flat_stable_sort (Iter_t first, Iter_t last,
+                                 Compare cmp = Compare())
+{
+    flat::flat_stable_sort<Iter_t, Compare,
+                           block_size_fss<sizeof(value_iter<Iter_t> )>::data>
+        (first, last, cmp);
+};
+
+template<class Iter_t, class Compare = compare_iter<Iter_t> >
+inline void indirect_flat_stable_sort (Iter_t first, Iter_t last,
+                                           Compare comp = Compare())
+{
+    typedef typename std::vector<Iter_t>::iterator itx_iter;
+    typedef common::less_ptr_no_null<Iter_t, Compare> itx_comp;
+    common::indirect_sort ( flat_stable_sort<itx_iter, itx_comp>,
+                            first, last, comp);
+};
+
+//****************************************************************************
+};//    End namespace sort
+};//    End namepspace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/heap_sort/heap_sort.hpp b/boost/sort/heap_sort/heap_sort.hpp
new file mode 100644
index 0000000000..9e89d00b8c
--- /dev/null
+++ b/boost/sort/heap_sort/heap_sort.hpp
@@ -0,0 +1,215 @@
+//----------------------------------------------------------------------------
+/// @file heap_sort.hpp
+/// @brief Insertion Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_INTROSORT_DETAIL_HEAP_SORT_HPP
+#define __BOOST_SORT_INTROSORT_DETAIL_HEAP_SORT_HPP
+
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <stdexcept>
+#include <utility> // for std::swap
+#include <boost/sort/common/util/traits.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace heap_detail
+{
+namespace bscu = boost::sort::common::util;
+//
+//---------------------------------------------------------------------------
+//  struct : heap_sort
+/// @brief : Heap sort algorithm
+/// @remarks This algorithm is O(NLogN)
+//---------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+struct heap_sort
+{
+    typedef bscu::value_iter<Iter_t> value_t;
+
+    //
+    //------------------------------------------------------------------------
+    //  function : sort3
+    /// @brief Sort and signal the changes of three values
+    /// @param val_0 : first value to compare
+    /// @param val_1 : second value to compare
+    /// @param val_2 : third value to compare
+    /// @param [out] bool_0 : if true indicates val_0 had been changed
+    /// @param [out] bool_1 : if true indicates val_1 had been changed
+    /// @param [out] bool_2 : if true indicates val_2 had been changed
+    /// @return if true , some value had changed
+    /// @remarks
+    //------------------------------------------------------------------------
+    bool sort3 (value_t &val_0, value_t &val_1, value_t &val_2, bool &bool_0,
+                bool &bool_1, bool &bool_2)
+    {
+        bool_0 = bool_1 = bool_2 = false;
+        int value = 0;
+        if (val_0 < val_1) value += 4;
+        if (val_1 < val_2) value += 2;
+        if (val_0 < val_2) value += 1;
+
+        switch (value)
+        {
+            case 0: break;
+
+            case 2:
+                std::swap (val_1, val_2);
+                bool_1 = bool_2 = true;
+                break;
+
+            case 3:
+                if (not(val_0 > val_1)) {
+                    std::swap (val_0, val_2);
+                    bool_0 = bool_2 = true;
+                }
+                else
+                {
+                    auto aux = std::move (val_2);
+                    val_2 = std::move (val_1);
+                    val_1 = std::move (val_0);
+                    val_0 = std::move (aux);
+                    bool_0 = bool_1 = bool_2 = true;
+                };
+                break;
+
+            case 4:
+                std::swap (val_0, val_1);
+                bool_0 = bool_1 = true;
+                break;
+
+            case 5:
+                if (val_1 > val_2) {
+                    auto aux = std::move (val_0);
+                    val_0 = std::move (val_1);
+                    val_1 = std::move (val_2);
+                    val_2 = std::move (aux);
+                    bool_0 = bool_1 = bool_2 = true;
+                }
+                else
+                {
+                    std::swap (val_0, val_2);
+                    bool_0 = bool_2 = true;
+                };
+                break;
+
+            case 7:
+                std::swap (val_0, val_2);
+                bool_0 = bool_2 = true;
+                break;
+
+            default: abort ( );
+        };
+        return (bool_0 or bool_1 or bool_2);
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  function : make_heap
+    /// @brief Make the heap for to extract the sorted elements
+    /// @param first : iterator to the first element of the range
+    /// @param nelem : number of lements of the range
+    /// @param comp : object for to compare two elements
+    /// @remarks This algorithm is O(NLogN)
+    //------------------------------------------------------------------------
+    void make_heap (Iter_t first, size_t nelem, Compare comp)
+    {
+        size_t pos_father, pos_son;
+        Iter_t iter_father = first, iter_son = first;
+        bool sw = false;
+
+        for (size_t i = 1; i < nelem; ++i)
+        {
+            pos_father = i;
+            iter_father = first + i;
+            sw = false;
+            do
+            {
+                iter_son = iter_father;
+                pos_son = pos_father;
+                pos_father = (pos_son - 1) >> 1;
+                iter_father = first + pos_father;
+                if ((sw = comp (*iter_father, *iter_son)))
+                    std::swap (*iter_father, *iter_son);
+            } while (sw and pos_father != 0);
+        };
+    };
+    //
+    //------------------------------------------------------------------------
+    //  function : heap_sort
+    /// @brief : Heap sort algorithm
+    /// @param first: iterator to the first element of the range
+    /// @param last : iterator to the next element of the last in the range
+    /// @param comp : object for to do the comparison between the elements
+    /// @remarks This algorithm is O(NLogN)
+    //------------------------------------------------------------------------
+    heap_sort (Iter_t first, Iter_t last, Compare comp)
+    {
+        assert ((last - first) >= 0);
+        size_t nelem = last - first;
+        if (nelem < 2) return;
+
+        //--------------------------------------------------------------------
+        // Creating the initial heap
+        //--------------------------------------------------------------------
+        make_heap (first, nelem, comp);
+
+        //--------------------------------------------------------------------
+        //  Sort the heap
+        //--------------------------------------------------------------------
+        size_t pos_father, pos_son;
+        Iter_t iter_father = first, iter_son = first;
+
+        bool sw = false;
+        for (size_t i = 1; i < nelem; ++i)
+        {
+            std::swap (*first, *(first + (nelem - i)));
+            pos_father = 0;
+            pos_son = 1;
+            iter_father = first;
+            sw = true;
+            while (sw and pos_son < (nelem - i))
+            {
+                // if the father have two sons must select the bigger
+                iter_son = first + pos_son;
+                if ((pos_son + 1) < (nelem - i) and
+                    comp (*iter_son, *(iter_son + 1)))
+                {
+                    ++pos_son;
+                    ++iter_son;
+                };
+                if ((sw = comp (*iter_father, *iter_son)))
+                    std::swap (*iter_father, *iter_son);
+                pos_father = pos_son;
+                iter_father = iter_son;
+                pos_son = (pos_father << 1) + 1;
+            };
+        };
+    };
+}; // End class heap_sort
+}; // end namespace heap_sort
+
+namespace bscu = boost::sort::common::util;
+
+template < class Iter_t, typename Compare = bscu::compare_iter < Iter_t > >
+void heap_sort (Iter_t first, Iter_t last, Compare comp = Compare())
+{
+	heap_detail::heap_sort<Iter_t, Compare> ( first, last, comp);
+}
+//
+//****************************************************************************
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/insert_sort/insert_sort.hpp b/boost/sort/insert_sort/insert_sort.hpp
new file mode 100644
index 0000000000..d40302ad10
--- /dev/null
+++ b/boost/sort/insert_sort/insert_sort.hpp
@@ -0,0 +1,119 @@
+//----------------------------------------------------------------------------
+/// @file insert_sort.hpp
+/// @brief Insertion Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_INTROSORT_DETAIL_INSERT_SORT_HPP
+#define __BOOST_SORT_INTROSORT_DETAIL_INSERT_SORT_HPP
+
+#include <functional>
+#include <iterator>
+#include <algorithm>
+#include <utility> // std::swap
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/util/insert.hpp>
+
+namespace boost
+{
+namespace sort
+{
+using common::util::compare_iter;
+using common::util::value_iter;
+//
+//-----------------------------------------------------------------------------
+//  function : insert_sort
+/// @brief : Insertion sort algorithm
+/// @param first: iterator to the first element of the range
+/// @param last : iterator to the next element of the last in the range
+/// @param comp : object for to do the comparison between the elements
+/// @remarks This algorithm is O(N^2)
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare = compare_iter < Iter_t > >
+static void insert_sort (Iter_t first, Iter_t last,
+                         Compare comp = Compare())
+{
+    //--------------------------------------------------------------------
+    //                   DEFINITIONS
+    //--------------------------------------------------------------------
+    typedef value_iter< Iter_t > value_t;
+
+    if ((last - first) < 2) return;
+
+    for (Iter_t it_examine = first + 1; it_examine != last; ++it_examine)
+    {
+        value_t Aux = std::move (*it_examine);
+        Iter_t it_insertion = it_examine;
+
+        while (it_insertion != first and comp (Aux, *(it_insertion - 1)))
+        {
+            *it_insertion = std::move (*(it_insertion - 1));
+            --it_insertion;
+        };
+        *it_insertion = std::move (Aux);
+    };
+};
+
+/*
+//
+//-----------------------------------------------------------------------------
+//  function : insert_partial_sort
+/// @brief : Insertion sort of elements sorted
+/// @param first: iterator to the first element of the range
+/// @param mid : last pointer of the sorted data, and first pointer to the
+///               elements to insert
+/// @param last : iterator to the next element of the last in the range
+/// @param comp : object for to do the comparison between the elements
+/// @remarks This algorithm is O(N^2)
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare = compare_iter < Iter_t > >
+void insert_partial_sort (Iter_t first, Iter_t mid, Iter_t last,
+                          Compare comp = Compare())
+{
+    //--------------------------------------------------------------------
+    //                   DEFINITIONS
+    //--------------------------------------------------------------------
+    typedef value_iter< Iter_t > value_t;
+
+    if ( mid == last ) return ;
+    insert_sort ( mid, last, comp);
+    if (first == mid) return ;
+
+    // creation of the vector of elements to insert and their position in the
+    // sorted part
+    std::vector<Iter_t> viter ;
+    std::vector<value_t> vdata ;
+
+    for ( Iter_t alpha = mid ; alpha != last ; ++alpha)
+        vdata.push_back ( std::move ( *alpha));
+
+    Iter_t linf = first , lsup = mid ;
+    for ( uint32_t i= 0 ; i < vdata.size() ; ++i)
+    {   Iter_t it1 = std::upper_bound ( linf, lsup , vdata[i], comp);
+        viter.push_back ( it1 );
+        linf = it1 ;
+    };
+
+    // moving the elements
+    viter.push_back ( mid) ;
+    for ( uint32_t i = viter.size() -1 ; i!= 0 ; --i)
+    {   Iter_t src = viter[i], limit = viter[i-1];
+        Iter_t dest = src + ( i);
+        while ( src != limit) * (--dest) = std::move ( *(--src));
+        *(viter[i-1] + (i -1)) = std::move (vdata[i-1]);
+    };
+}
+*/
+//
+//****************************************************************************
+}; //    End namespace sort
+}; //    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/parallel_stable_sort/parallel_stable_sort.hpp b/boost/sort/parallel_stable_sort/parallel_stable_sort.hpp
new file mode 100644
index 0000000000..9df7dffd2a
--- /dev/null
+++ b/boost/sort/parallel_stable_sort/parallel_stable_sort.hpp
@@ -0,0 +1,270 @@
+//----------------------------------------------------------------------------
+/// @file parallel_stable_sort.hpp
+/// @brief This file contains the class parallel_stable_sort
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP
+
+#include <boost/sort/sample_sort/sample_sort.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <functional>
+#include <future>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+namespace boost
+{
+namespace sort
+{
+namespace stable_detail
+{
+
+//---------------------------------------------------------------------------
+//                    USING SENTENCES
+//---------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+namespace bss = boost::sort::spin_detail;
+using bsc::range;
+using bsc::merge_half;
+using boost::sort::sample_detail::sample_sort;
+//
+///---------------------------------------------------------------------------
+/// @struct parallel_stable_sort
+/// @brief This a structure for to implement a parallel stable sort, exception
+///        safe
+//----------------------------------------------------------------------------
+template <class Iter_t, class Compare = compare_iter <Iter_t> >
+struct parallel_stable_sort
+{
+    //-------------------------------------------------------------------------
+    //                      DEFINITIONS
+    //-------------------------------------------------------------------------
+    typedef value_iter<Iter_t> value_t;
+
+    //-------------------------------------------------------------------------
+    //                     VARIABLES
+    //-------------------------------------------------------------------------
+    // Number of elements to sort
+    size_t nelem;
+    // Pointer to the auxiliary memory needed for the algorithm
+    value_t *ptr;
+    // Minimal number of elements for to be sorted in parallel mode
+    const size_t nelem_min = 1 << 16;
+
+    //------------------------------------------------------------------------
+    //                F U N C T I O N S
+    //------------------------------------------------------------------------
+    parallel_stable_sort (Iter_t first, Iter_t last)
+    : parallel_stable_sort (first, last, Compare(),
+                            std::thread::hardware_concurrency()) { };
+
+    parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp)
+    : parallel_stable_sort (first, last, cmp,
+                            std::thread::hardware_concurrency()) { };
+
+    parallel_stable_sort (Iter_t first, Iter_t last, uint32_t num_thread)
+    : parallel_stable_sort (first, last, Compare(), num_thread) { };
+
+    parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp,
+                          uint32_t num_thread);
+
+    //
+    //-----------------------------------------------------------------------------
+    //  function : destroy_all
+    /// @brief The utility is to destroy the temporary buffer used in the
+    ///        sorting process
+    //-----------------------------------------------------------------------------
+    void destroy_all()
+    {
+        if (ptr != nullptr) std::return_temporary_buffer(ptr);
+    };
+    //
+    //-----------------------------------------------------------------------------
+    //  function :~parallel_stable_sort
+    /// @brief destructor of the class. The utility is to destroy the temporary
+    ///        buffer used in the sorting process
+    //-----------------------------------------------------------------------------
+    ~parallel_stable_sort() {destroy_all(); } ;
+};
+// end struct parallel_stable_sort
+
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            N O N     I N L I N E      F U N C T I O N S                  ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : parallel_stable_sort
+/// @brief constructor of the class
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///                    iterators
+/// @param nthread : Number of threads to use in the process. When this value
+///                  is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Compare>
+parallel_stable_sort <Iter_t, Compare>
+::parallel_stable_sort (Iter_t first, Iter_t last, Compare comp,
+                        uint32_t nthread) : nelem(0), ptr(nullptr)
+{
+    range<Iter_t> range_initial(first, last);
+    assert(range_initial.valid());
+
+    nelem = range_initial.size();
+    size_t nptr = (nelem + 1) >> 1;
+
+    if (nelem < nelem_min or nthread < 2)
+    {
+        bss::spinsort<Iter_t, Compare>
+            (range_initial.first, range_initial.last, comp);
+        return;
+    };
+
+    //------------------- check if sort --------------------------------------
+    bool sw = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+         it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
+    if (sw) return;
+
+    //------------------- check if reverse sort ---------------------------
+    sw = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+         it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
+    if (sw)
+    {
+        size_t nelem2 = nelem >> 1;
+        Iter_t it1 = first, it2 = last - 1;
+        for (size_t i = 0; i < nelem2; ++i)
+            std::swap(*(it1++), *(it2--));
+        return;
+    };
+
+    ptr = std::get_temporary_buffer<value_t>(nptr).first;
+    if (ptr == nullptr) throw std::bad_alloc();
+
+    //---------------------------------------------------------------------
+    //     Parallel Process
+    //---------------------------------------------------------------------
+    range<Iter_t> range_first(range_initial.first, range_initial.first + nptr);
+
+    range<Iter_t> range_second(range_initial.first + nptr, range_initial.last);
+
+    range<value_t *> range_buffer(ptr, ptr + nptr);
+
+    try
+    {
+        sample_sort<Iter_t, Compare>
+            (range_initial.first, range_initial.first + nptr,
+             comp, nthread, range_buffer);
+    } catch (std::bad_alloc &)
+    {
+        destroy_all();
+        throw std::bad_alloc();
+    };
+
+    try
+    {
+        sample_sort<Iter_t, Compare>
+            (range_initial.first + nptr,
+             range_initial.last, comp, nthread, range_buffer);
+    } catch (std::bad_alloc &)
+    {
+        destroy_all();
+        throw std::bad_alloc();
+    };
+
+    range_buffer = move_forward(range_buffer, range_first);
+    range_initial = merge_half(range_initial, range_buffer, range_second, comp);
+}; // end of constructor
+
+//
+//****************************************************************************
+};//    End namespace stable_detail
+//****************************************************************************
+//
+
+//---------------------------------------------------------------------------
+//                    USING SENTENCES
+//---------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+namespace bscu = bsc::util;
+namespace bss = boost::sort::spin_detail;
+using bsc::range;
+using bsc::merge_half;
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//            P A R A L L E L _ S T A B L E _ S O R T                       ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : parallel_stable_sort
+/// @brief : parallel stable sort algorithm.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+void parallel_stable_sort(Iter_t first, Iter_t last)
+{
+    typedef bscu::compare_iter<Iter_t> Compare;
+    stable_detail::parallel_stable_sort<Iter_t, Compare>(first, last);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : parallel_stable_sort
+/// @brief parallel stable sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param nthread : Number of threads to use in the process. When this value
+///                  is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+void parallel_stable_sort(Iter_t first, Iter_t last, uint32_t nthread)
+{
+    typedef bscu::compare_iter<Iter_t> Compare;
+    stable_detail::parallel_stable_sort<Iter_t, Compare>(first, last, nthread);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : parallel_stable_sort
+/// @brief : parallel stable sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Compare,
+          bscu::enable_if_not_integral<Compare> * = nullptr>
+void parallel_stable_sort(Iter_t first, Iter_t last, Compare comp)
+{
+    stable_detail::parallel_stable_sort<Iter_t, Compare>(first, last, comp);
+};
+//
+//****************************************************************************
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/pdqsort/pdqsort.hpp b/boost/sort/pdqsort/pdqsort.hpp
new file mode 100644
index 0000000000..dc81d87057
--- /dev/null
+++ b/boost/sort/pdqsort/pdqsort.hpp
@@ -0,0 +1,632 @@
+// Pattern-defeating quicksort
+
+//              Copyright Orson Peters 2017.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort/ for library home page.
+
+
+#ifndef BOOST_SORT_PDQSORT_HPP
+#define BOOST_SORT_PDQSORT_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <iterator>
+#include <utility>
+#include <boost/type_traits.hpp>
+
+#if __cplusplus >= 201103L
+    #include <cstdint>
+    #define BOOST_PDQSORT_PREFER_MOVE(x) std::move(x)
+#else
+    #define BOOST_PDQSORT_PREFER_MOVE(x) (x)
+#endif
+
+namespace boost {
+namespace sort {
+
+namespace pdqsort_detail {
+    enum {
+        // Partitions below this size are sorted using insertion sort.
+        insertion_sort_threshold = 24,
+
+        // Partitions above this size use Tukey's ninther to select the pivot.
+        ninther_threshold = 128,
+
+        // When we detect an already sorted partition, attempt an insertion sort that allows this
+        // amount of element moves before giving up.
+        partial_insertion_sort_limit = 8,
+
+        // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
+        block_size = 64,
+
+        // Cacheline size, assumes power of two.
+        cacheline_size = 64
+    };
+
+    template<class T> struct is_default_compare : boost::false_type { };
+    template<class T> struct is_default_compare<std::less<T> > : boost::true_type { };
+    template<class T> struct is_default_compare<std::greater<T> > : boost::true_type { };
+
+    // Returns floor(log2(n)), assumes n > 0.
+    template<class T>
+    inline int log2(T n) {
+        int log = 0;
+        while (n >>= 1) ++log;
+        return log;
+    }
+
+    // Sorts [begin, end) using insertion sort with the given comparison function.
+    template<class Iter, class Compare>
+    inline void insertion_sort(Iter begin, Iter end, Compare comp) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+        if (begin == end) return;
+
+        for (Iter cur = begin + 1; cur != end; ++cur) {
+            Iter sift = cur;
+            Iter sift_1 = cur - 1;
+
+            // Compare first so we can avoid 2 moves for an element already positioned correctly.
+            if (comp(*sift, *sift_1)) {
+                T tmp = BOOST_PDQSORT_PREFER_MOVE(*sift);
+
+                do { *sift-- = BOOST_PDQSORT_PREFER_MOVE(*sift_1); }
+                while (sift != begin && comp(tmp, *--sift_1));
+
+                *sift = BOOST_PDQSORT_PREFER_MOVE(tmp);
+            }
+        }
+    }
+
+    // Sorts [begin, end) using insertion sort with the given comparison function. Assumes
+    // *(begin - 1) is an element smaller than or equal to any element in [begin, end).
+    template<class Iter, class Compare>
+    inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+        if (begin == end) return;
+
+        for (Iter cur = begin + 1; cur != end; ++cur) {
+            Iter sift = cur;
+            Iter sift_1 = cur - 1;
+
+            // Compare first so we can avoid 2 moves for an element already positioned correctly.
+            if (comp(*sift, *sift_1)) {
+                T tmp = BOOST_PDQSORT_PREFER_MOVE(*sift);
+
+                do { *sift-- = BOOST_PDQSORT_PREFER_MOVE(*sift_1); }
+                while (comp(tmp, *--sift_1));
+
+                *sift = BOOST_PDQSORT_PREFER_MOVE(tmp);
+            }
+        }
+    }
+
+    // Attempts to use insertion sort on [begin, end). Will return false if more than
+    // partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
+    // successfully sort and return true.
+    template<class Iter, class Compare>
+    inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+        if (begin == end) return true;
+        
+        int limit = 0;
+        for (Iter cur = begin + 1; cur != end; ++cur) {
+            if (limit > partial_insertion_sort_limit) return false;
+
+            Iter sift = cur;
+            Iter sift_1 = cur - 1;
+
+            // Compare first so we can avoid 2 moves for an element already positioned correctly.
+            if (comp(*sift, *sift_1)) {
+                T tmp = BOOST_PDQSORT_PREFER_MOVE(*sift);
+
+                do { *sift-- = BOOST_PDQSORT_PREFER_MOVE(*sift_1); }
+                while (sift != begin && comp(tmp, *--sift_1));
+
+                *sift = BOOST_PDQSORT_PREFER_MOVE(tmp);
+                limit += cur - sift;
+            }
+        }
+
+        return true;
+    }
+
+    template<class Iter, class Compare>
+    inline void sort2(Iter a, Iter b, Compare comp) {
+        if (comp(*b, *a)) std::iter_swap(a, b);
+    }
+
+    // Sorts the elements *a, *b and *c using comparison function comp.
+    template<class Iter, class Compare>
+    inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
+        sort2(a, b, comp);
+        sort2(b, c, comp);
+        sort2(a, b, comp);
+    }
+
+    template<class T>
+    inline T* align_cacheline(T* p) {
+#if defined(UINTPTR_MAX) && __cplusplus >= 201103L
+        std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
+#else
+        std::size_t ip = reinterpret_cast<std::size_t>(p);
+#endif
+        ip = (ip + cacheline_size - 1) & -cacheline_size;
+        return reinterpret_cast<T*>(ip);
+    }
+
+    template<class Iter>
+    inline void swap_offsets(Iter first, Iter last,
+                             unsigned char* offsets_l, unsigned char* offsets_r,
+                             int num, bool use_swaps) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+        if (use_swaps) {
+            // This case is needed for the descending distribution, where we need
+            // to have proper swapping for pdqsort to remain O(n).
+            for (int i = 0; i < num; ++i) {
+                std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
+            }
+        } else if (num > 0) {
+            Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
+            T tmp(BOOST_PDQSORT_PREFER_MOVE(*l)); *l = BOOST_PDQSORT_PREFER_MOVE(*r);
+            for (int i = 1; i < num; ++i) {
+                l = first + offsets_l[i]; *r = BOOST_PDQSORT_PREFER_MOVE(*l);
+                r = last - offsets_r[i]; *l = BOOST_PDQSORT_PREFER_MOVE(*r);
+            }
+            *r = BOOST_PDQSORT_PREFER_MOVE(tmp);
+        }
+    }
+
+    // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
+    // to the pivot are put in the right-hand partition. Returns the position of the pivot after
+    // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
+    // pivot is a median of at least 3 elements and that [begin, end) is at least
+    // insertion_sort_threshold long. Uses branchless partitioning.
+    template<class Iter, class Compare>
+    inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+
+        // Move pivot into local for speed.
+        T pivot(BOOST_PDQSORT_PREFER_MOVE(*begin));
+        Iter first = begin;
+        Iter last = end;
+
+        // Find the first element greater than or equal than the pivot (the median of 3 guarantees
+        // this exists).
+        while (comp(*++first, pivot));
+
+        // Find the first element strictly smaller than the pivot. We have to guard this search if
+        // there was no element before *first.
+        if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
+        else                    while (                !comp(*--last, pivot));
+
+        // If the first pair of elements that should be swapped to partition are the same element,
+        // the passed in sequence already was correctly partitioned.
+        bool already_partitioned = first >= last;
+        if (!already_partitioned) {
+            std::iter_swap(first, last);
+            ++first;
+        }
+
+        // The following branchless partitioning is derived from "BlockQuicksort: How Branch
+        // Mispredictions don't affect Quicksort" by Stefan Edelkamp and Armin Weiss.
+        unsigned char offsets_l_storage[block_size + cacheline_size];
+        unsigned char offsets_r_storage[block_size + cacheline_size];
+        unsigned char* offsets_l = align_cacheline(offsets_l_storage);
+        unsigned char* offsets_r = align_cacheline(offsets_r_storage);
+        int num_l, num_r, start_l, start_r;
+        num_l = num_r = start_l = start_r = 0;
+        
+        while (last - first > 2 * block_size) {
+            // Fill up offset blocks with elements that are on the wrong side.
+            if (num_l == 0) {
+                start_l = 0;
+                Iter it = first;
+                for (unsigned char i = 0; i < block_size;) {
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                    offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+                }
+            }
+            if (num_r == 0) {
+                start_r = 0;
+                Iter it = last;
+                for (unsigned char i = 0; i < block_size;) {
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                    offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+                }
+            }
+
+            // Swap elements and update block sizes and first/last boundaries.
+            int num = (std::min)(num_l, num_r);
+            swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r,
+                         num, num_l == num_r);
+            num_l -= num; num_r -= num;
+            start_l += num; start_r += num;
+            if (num_l == 0) first += block_size;
+            if (num_r == 0) last -= block_size;
+        }
+
+        int l_size = 0, r_size = 0;
+        int unknown_left = (last - first) - ((num_r || num_l) ? block_size : 0);
+        if (num_r) {
+            // Handle leftover block by assigning the unknown elements to the other block.
+            l_size = unknown_left;
+            r_size = block_size;
+        } else if (num_l) {
+            l_size = block_size;
+            r_size = unknown_left;
+        } else {
+            // No leftover block, split the unknown elements in two blocks.
+            l_size = unknown_left/2;
+            r_size = unknown_left - l_size;
+        }
+
+        // Fill offset buffers if needed.
+        if (unknown_left && !num_l) {
+            start_l = 0;
+            Iter it = first;
+            for (unsigned char i = 0; i < l_size;) {
+                offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
+            }
+        }
+        if (unknown_left && !num_r) {
+            start_r = 0;
+            Iter it = last;
+            for (unsigned char i = 0; i < r_size;) {
+                offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
+            }
+        }
+
+        int num = (std::min)(num_l, num_r);
+        swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r, num, num_l == num_r);
+        num_l -= num; num_r -= num;
+        start_l += num; start_r += num;
+        if (num_l == 0) first += l_size;
+        if (num_r == 0) last -= r_size;
+        
+        // We have now fully identified [first, last)'s proper position. Swap the last elements.
+        if (num_l) {
+            offsets_l += start_l;
+            while (num_l--) std::iter_swap(first + offsets_l[num_l], --last);
+            first = last;
+        }
+        if (num_r) {
+            offsets_r += start_r;
+            while (num_r--) std::iter_swap(last - offsets_r[num_r], first), ++first;
+            last = first;
+        }
+
+        // Put the pivot in the right place.
+        Iter pivot_pos = first - 1;
+        *begin = BOOST_PDQSORT_PREFER_MOVE(*pivot_pos);
+        *pivot_pos = BOOST_PDQSORT_PREFER_MOVE(pivot);
+
+        return std::make_pair(pivot_pos, already_partitioned);
+    }
+
+    // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
+    // to the pivot are put in the right-hand partition. Returns the position of the pivot after
+    // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
+    // pivot is a median of at least 3 elements and that [begin, end) is at least
+    // insertion_sort_threshold long.
+    template<class Iter, class Compare>
+    inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+        
+        // Move pivot into local for speed.
+        T pivot(BOOST_PDQSORT_PREFER_MOVE(*begin));
+
+        Iter first = begin;
+        Iter last = end;
+
+        // Find the first element greater than or equal than the pivot (the median of 3 guarantees
+        // this exists).
+        while (comp(*++first, pivot));
+
+        // Find the first element strictly smaller than the pivot. We have to guard this search if
+        // there was no element before *first.
+        if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
+        else                    while (                !comp(*--last, pivot));
+
+        // If the first pair of elements that should be swapped to partition are the same element,
+        // the passed in sequence already was correctly partitioned.
+        bool already_partitioned = first >= last;
+        
+        // Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
+        // swapped pairs guard the searches, which is why the first iteration is special-cased
+        // above.
+        while (first < last) {
+            std::iter_swap(first, last);
+            while (comp(*++first, pivot));
+            while (!comp(*--last, pivot));
+        }
+
+        // Put the pivot in the right place.
+        Iter pivot_pos = first - 1;
+        *begin = BOOST_PDQSORT_PREFER_MOVE(*pivot_pos);
+        *pivot_pos = BOOST_PDQSORT_PREFER_MOVE(pivot);
+
+        return std::make_pair(pivot_pos, already_partitioned);
+    }
+
+    // Similar function to the one above, except elements equal to the pivot are put to the left of
+    // the pivot and it doesn't check or return if the passed sequence already was partitioned.
+    // Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
+    // performance, no block quicksort is applied here for simplicity.
+    template<class Iter, class Compare>
+    inline Iter partition_left(Iter begin, Iter end, Compare comp) {
+        typedef typename std::iterator_traits<Iter>::value_type T;
+
+        T pivot(BOOST_PDQSORT_PREFER_MOVE(*begin));
+        Iter first = begin;
+        Iter last = end;
+        
+        while (comp(pivot, *--last));
+
+        if (last + 1 == end) while (first < last && !comp(pivot, *++first));
+        else                 while (                !comp(pivot, *++first));
+
+        while (first < last) {
+            std::iter_swap(first, last);
+            while (comp(pivot, *--last));
+            while (!comp(pivot, *++first));
+        }
+
+        Iter pivot_pos = last;
+        *begin = BOOST_PDQSORT_PREFER_MOVE(*pivot_pos);
+        *pivot_pos = BOOST_PDQSORT_PREFER_MOVE(pivot);
+
+        return pivot_pos;
+    }
+
+
+    template<class Iter, class Compare, bool Branchless>
+    inline void pdqsort_loop(Iter begin, Iter end, Compare comp, int bad_allowed, bool leftmost = true) {
+        typedef typename std::iterator_traits<Iter>::difference_type diff_t;
+
+        // Use a while loop for tail recursion elimination.
+        while (true) {
+            diff_t size = end - begin;
+
+            // Insertion sort is faster for small arrays.
+            if (size < insertion_sort_threshold) {
+                if (leftmost) insertion_sort(begin, end, comp);
+                else unguarded_insertion_sort(begin, end, comp);
+                return;
+            }
+
+            // Choose pivot as median of 3 or pseudomedian of 9.
+            diff_t s2 = size / 2;
+            if (size > ninther_threshold) {
+                sort3(begin, begin + s2, end - 1, comp);
+                sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
+                sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
+                sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
+                std::iter_swap(begin, begin + s2);
+            } else sort3(begin + s2, begin, end - 1, comp);
+
+            // If *(begin - 1) is the end of the right partition of a previous partition operation
+            // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
+            // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
+            // the left partition, greater elements in the right partition. We do not have to
+            // recurse on the left partition, since it's sorted (all equal).
+            if (!leftmost && !comp(*(begin - 1), *begin)) {
+                begin = partition_left(begin, end, comp) + 1;
+                continue;
+            }
+
+            // Partition and get results.
+            std::pair<Iter, bool> part_result =
+                Branchless ? partition_right_branchless(begin, end, comp)
+                           : partition_right(begin, end, comp);
+            Iter pivot_pos = part_result.first;
+            bool already_partitioned = part_result.second;
+
+            // Check for a highly unbalanced partition.
+            diff_t l_size = pivot_pos - begin;
+            diff_t r_size = end - (pivot_pos + 1);
+            bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
+
+            // If we got a highly unbalanced partition we shuffle elements to break many patterns.
+            if (highly_unbalanced) {
+                // If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
+                if (--bad_allowed == 0) {
+                    std::make_heap(begin, end, comp);
+                    std::sort_heap(begin, end, comp);
+                    return;
+                }
+
+                if (l_size >= insertion_sort_threshold) {
+                    std::iter_swap(begin,             begin + l_size / 4);
+                    std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
+
+                    if (l_size > ninther_threshold) {
+                        std::iter_swap(begin + 1,         begin + (l_size / 4 + 1));
+                        std::iter_swap(begin + 2,         begin + (l_size / 4 + 2));
+                        std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
+                        std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
+                    }
+                }
+                
+                if (r_size >= insertion_sort_threshold) {
+                    std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
+                    std::iter_swap(end - 1,                   end - r_size / 4);
+                    
+                    if (r_size > ninther_threshold) {
+                        std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
+                        std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
+                        std::iter_swap(end - 2,             end - (1 + r_size / 4));
+                        std::iter_swap(end - 3,             end - (2 + r_size / 4));
+                    }
+                }
+            } else {
+                // If we were decently balanced and we tried to sort an already partitioned
+                // sequence try to use insertion sort.
+                if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
+                                        && partial_insertion_sort(pivot_pos + 1, end, comp)) return;
+            }
+                
+            // Sort the left partition first using recursion and do tail recursion elimination for
+            // the right-hand partition.
+            pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost);
+            begin = pivot_pos + 1;
+            leftmost = false;
+        }
+    }
+}
+
+
+/*! \brief Generic sort algorithm using random access iterators and a user-defined comparison operator.
+
+    \details @c pdqsort is a fast generic sorting algorithm that is similar in concept to introsort
+but runs faster on certain patterns. @c pdqsort is in-place, unstable, deterministic, has a worst
+case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>. Even without patterns, the
+quicksort has been very efficiently implemented, and @c pdqsort runs 1-5% faster than GCC 6.2's
+@c std::sort. If the type being sorted is @c std::is_arithmetic and Compare is @c std::less or
+@c std::greater this function will automatically use @c pdqsort_branchless for far greater speedups.
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
+   (or moves), functors, or any operations on iterators throw.
+   \warning Invalid arguments cause undefined behaviour.
+   \warning Throwing an exception may cause data loss.
+*/
+template<class Iter, class Compare>
+inline void pdqsort(Iter first, Iter last, Compare comp) {
+    if (first == last) return;
+    pdqsort_detail::pdqsort_loop<Iter, Compare,
+        pdqsort_detail::is_default_compare<typename boost::decay<Compare>::type>::value &&
+        boost::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
+        first, last, comp, pdqsort_detail::log2(last - first));
+}
+
+
+/*! \brief Generic sort algorithm using random access iterators and a user-defined comparison operator.
+
+    \details @c pdqsort_branchless is a fast generic sorting algorithm that is similar in concept to
+introsort but runs faster on certain patterns. @c pdqsort_branchless is in-place, unstable,
+deterministic, has a worst case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>.
+Even without patterns, the quicksort has been very efficiently implemented with block based
+partitioning, and @c pdqsort_branchless runs 80-90% faster than GCC 6.2's @c std::sort when sorting
+small data such as integers. However, this speedup is gained by totally bypassing the branch
+predictor, if your comparison operator or iterator contains branches you will most likely see little
+gain or a small loss in performance.
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
+   (or moves), functors, or any operations on iterators throw.
+   \warning Invalid arguments cause undefined behaviour.
+   \warning Throwing an exception may cause data loss.
+*/
+template<class Iter, class Compare>
+inline void pdqsort_branchless(Iter first, Iter last, Compare comp) {
+    if (first == last) return;
+    pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
+        first, last, comp, pdqsort_detail::log2(last - first));
+}
+
+
+/*! \brief Generic sort algorithm using random access iterators.
+
+    \details @c pdqsort is a fast generic sorting algorithm that is similar in concept to introsort
+but runs faster on certain patterns. @c pdqsort is in-place, unstable, deterministic, has a worst
+case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>. Even without patterns, the
+quicksort partitioning has been very efficiently implemented, and @c pdqsort runs 80-90% faster than
+GCC 6.2's @c std::sort. If the type being sorted is @c std::is_arithmetic this function will
+automatically use @c pdqsort_branchless.
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
+   (or moves), functors, or any operations on iterators throw.
+   \warning Invalid arguments cause undefined behaviour.
+   \warning Throwing an exception may cause data loss.
+*/
+template<class Iter>
+inline void pdqsort(Iter first, Iter last) {
+    typedef typename std::iterator_traits<Iter>::value_type T;
+    pdqsort(first, last, std::less<T>());
+}
+
+
+/*! \brief Generic sort algorithm using random access iterators.
+
+    \details @c pdqsort_branchless is a fast generic sorting algorithm that is similar in concept to
+introsort but runs faster on certain patterns. @c pdqsort_branchless is in-place, unstable,
+deterministic, has a worst case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>.
+Even without patterns, the quicksort has been very efficiently implemented with block based
+partitioning, and @c pdqsort_branchless runs 80-90% faster than GCC 6.2's @c std::sort when sorting
+small data such as integers. However, this speedup is gained by totally bypassing the branch
+predictor, if your comparison operator or iterator contains branches you will most likely see little
+gain or a small loss in performance.
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
+   (or moves), functors, or any operations on iterators throw.
+   \warning Invalid arguments cause undefined behaviour.
+   \warning Throwing an exception may cause data loss.
+*/
+template<class Iter>
+inline void pdqsort_branchless(Iter first, Iter last) {
+    typedef typename std::iterator_traits<Iter>::value_type T;
+    pdqsort_branchless(first, last, std::less<T>());
+}
+
+}
+}
+
+#undef BOOST_PDQSORT_PREFER_MOVE
+
+#endif
diff --git a/boost/sort/sample_sort/sample_sort.hpp b/boost/sort/sample_sort/sample_sort.hpp
new file mode 100644
index 0000000000..ded1781cfa
--- /dev/null
+++ b/boost/sort/sample_sort/sample_sort.hpp
@@ -0,0 +1,560 @@
+//----------------------------------------------------------------------------
+/// @file sample_sort.hpp
+/// @brief contains the class sample_sort
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
+
+#include <functional>
+#include <future>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include <algorithm>
+#include <boost/sort/spinsort/spinsort.hpp>
+#include <boost/sort/common/indirect.hpp>
+#include <boost/sort/common/util/atomic.hpp>
+#include <boost/sort/common/merge_four.hpp>
+#include <boost/sort/common/merge_vector.hpp>
+#include <boost/sort/common/range.hpp>
+
+namespace boost
+{
+namespace sort
+{
+namespace sample_detail
+{
+//---------------------------------------------------------------------------
+//                    USING SENTENCES
+//---------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+namespace bss = boost::sort::spin_detail;
+namespace bscu = boost::sort::common::util;
+using bsc::range;
+using bscu::atomic_add;
+using bsc::merge_vector4;
+using bsc::uninit_merge_level4;
+using bsc::less_ptr_no_null;
+
+//
+///---------------------------------------------------------------------------
+/// @struct sample_sort
+/// @brief This a structure for to implement a sample sort, exception
+///        safe
+/// @tparam
+/// @remarks
+//----------------------------------------------------------------------------
+template<class Iter_t, class Compare>
+struct sample_sort
+{
+    //------------------------------------------------------------------------
+    //                     DEFINITIONS
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter_t> value_t;
+    typedef range<Iter_t> range_it;
+    typedef range<value_t *> range_buf;
+    typedef sample_sort<Iter_t, Compare> this_t;
+
+    //------------------------------------------------------------------------
+    //                VARIABLES AND CONSTANTS
+    //------------------------------------------------------------------------
+    // minimun numbers of elements for to be sortd in parallel mode
+    static const uint32_t thread_min = (1 << 16);
+
+    // Number of threads to use in the algorithm
+    // Number of intervals for to do the internal division of the data
+    uint32_t nthread, ninterval;
+
+    // Bool variables indicating if the auxiliary memory is constructed
+    // and indicating in the auxiliary memory had been obtained inside the
+    /// algorithm or had been received as a parameter
+    bool construct = false, owner = false;
+
+    // Comparison object for to compare two elements
+    Compare comp;
+
+    // Range with all the elements to sort
+    range_it global_range;
+
+    // range with the auxiliary memory
+    range_buf global_buf;
+
+    // vector of futures
+    std::vector<std::future<void>> vfuture;
+
+    // vector of vectors which contains the ranges to merge obtained in the
+    // subdivision
+    std::vector<std::vector<range_it>> vv_range_it;
+
+    // each vector of ranges of the vv_range_it, need their corresponding buffer
+    // for to do the merge
+    std::vector<std::vector<range_buf>> vv_range_buf;
+
+    // Initial vector of ranges
+    std::vector<range_it> vrange_it_ini;
+
+    // Initial vector of buffers
+    std::vector<range_buf> vrange_buf_ini;
+
+    // atomic counter for to know when are finished the function_t created
+    // inside a function
+    std::atomic<uint32_t> njob;
+
+    // Indicate if an error in the algorithm for to undo all
+    bool error;
+
+    //------------------------------------------------------------------------
+    //                       FUNCTIONS OF THE STRUCT
+    //------------------------------------------------------------------------
+    void initial_configuration(void);
+
+    sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
+                 value_t *paux, size_t naux);
+
+    sample_sort(Iter_t first, Iter_t last)
+    : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(),
+                   nullptr, 0) { };
+
+    sample_sort(Iter_t first, Iter_t last, Compare cmp)
+    : sample_sort(first, last, cmp, std::thread::hardware_concurrency(),
+                  nullptr, 0) { };
+
+    sample_sort(Iter_t first, Iter_t last, uint32_t num_thread)
+    : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { };
+
+    sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
+    : sample_sort(first, last, cmp, num_thread, nullptr, 0) { };
+
+    sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
+                range_buf range_buf_initial)
+    : sample_sort(first, last, cmp, num_thread,
+                  range_buf_initial.first, range_buf_initial.size()) { };
+
+    void destroy_all(void);
+    //
+    //-----------------------------------------------------------------------------
+    //  function :~sample_sort
+    /// @brief destructor of the class. The utility is to destroy the temporary
+    ///        buffer used in the sorting process
+    //-----------------------------------------------------------------------------
+    ~sample_sort(void) { destroy_all(); };
+    //
+    //-----------------------------------------------------------------------
+    //  function : execute first
+    /// @brief this a function to assign to each thread in the first merge
+    //-----------------------------------------------------------------------
+    void execute_first(void)
+    {
+        uint32_t job = 0;
+        while ((job = atomic_add(njob, 1)) < ninterval)
+        {
+            uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job],
+                            vv_range_buf[job], comp);
+        };
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  function : execute
+    /// @brief this is a function to assignt each thread the final merge
+    //-----------------------------------------------------------------------
+    void execute(void)
+    {
+        uint32_t job = 0;
+        while ((job = atomic_add(njob, 1)) < ninterval)
+        {
+            merge_vector4(vrange_buf_ini[job], vrange_it_ini[job],
+                            vv_range_buf[job], vv_range_it[job], comp);
+        };
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  function : first merge
+    /// @brief Implement the merge of the initially sparse ranges
+    //-----------------------------------------------------------------------
+    void first_merge(void)
+    { //---------------------------------- begin --------------------------
+        njob = 0;
+
+        for (uint32_t i = 0; i < nthread; ++i)
+        {
+            vfuture[i] = std::async(std::launch::async, &this_t::execute_first,
+                            this);
+        };
+        for (uint32_t i = 0; i < nthread; ++i)
+            vfuture[i].get();
+    };
+    //
+    //-----------------------------------------------------------------------
+    //  function : final merge
+    /// @brief Implement the final merge of the ranges
+    //-----------------------------------------------------------------------
+    void final_merge(void)
+    { //---------------------------------- begin --------------------------
+        njob = 0;
+
+        for (uint32_t i = 0; i < nthread; ++i)
+        {
+            vfuture[i] = std::async(std::launch::async, &this_t::execute, this);
+        };
+        for (uint32_t i = 0; i < nthread; ++i)
+            vfuture[i].get();
+    };
+    //----------------------------------------------------------------------------
+};
+//                    End class sample_sort
+//----------------------------------------------------------------------------
+//
+//############################################################################
+//                                                                          ##
+//              N O N    I N L I N E      F U N C T I O N S                 ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : sample_sort
+/// @brief constructor of the class
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param cmp : object for to compare two elements pointed by Iter_t iterators
+/// @param num_thread : Number of threads to use in the process. When this value
+///                     is lower than 2, the sorting is done with 1 thread
+/// @param paux : pointer to the auxiliary memory. If nullptr, the memory is
+///               created inside the class
+/// @param naux : number of elements of the memory pointed by paux
+//-----------------------------------------------------------------------------
+template<class Iter_t, typename Compare>
+sample_sort<Iter_t, Compare>
+::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
+               value_t *paux, size_t naux)
+: nthread(num_thread), owner(false), comp(cmp), global_range(first, last),
+  global_buf(nullptr, nullptr), error(false)
+{
+    assert((last - first) >= 0);
+    size_t nelem = size_t(last - first);
+    construct = false;
+    njob = 0;
+    vfuture.resize(nthread);
+
+    // Adjust when have many threads and only a few elements
+    while (nelem > thread_min and (nthread * nthread) > (nelem >> 3))
+    {
+        nthread /= 2;
+    };
+    ninterval = (nthread << 3);
+
+    if (nthread < 2 or nelem <= (thread_min))
+    {
+        bss::spinsort<Iter_t, Compare>(first, last, comp);
+        return;
+    };
+
+    //------------------- check if sort --------------------------------------
+    bool sw = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+                    it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
+    if (sw) return;
+
+    //------------------- check if reverse sort ---------------------------
+    sw = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+                    it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
+    if (sw)
+    {
+        size_t nelem2 = nelem >> 1;
+        Iter_t it1 = first, it2 = last - 1;
+        for (size_t i = 0; i < nelem2; ++i)
+            std::swap(*(it1++), *(it2--));
+        return;
+    };
+
+    if (paux != nullptr)
+    {
+        assert(naux != 0);
+        global_buf.first = paux;
+        global_buf.last = paux + naux;
+        owner = false;
+    }
+    else
+    {
+        value_t *ptr = std::get_temporary_buffer<value_t>(nelem).first;
+        if (ptr == nullptr) throw std::bad_alloc();
+        owner = true;
+        global_buf = range_buf(ptr, ptr + nelem);
+    };
+    //------------------------------------------------------------------------
+    //                    PROCESS
+    //------------------------------------------------------------------------
+    try
+    {
+        initial_configuration();
+    } catch (std::bad_alloc &)
+    {
+        error = true;
+    };
+    if (not error)
+    {
+        first_merge();
+        construct = true;
+        final_merge();
+    };
+    if (error)
+    {
+        destroy_all();
+        throw std::bad_alloc();
+    };
+}
+;
+//
+//-----------------------------------------------------------------------------
+//  function : destroy_all
+/// @brief destructor of the class. The utility is to destroy the temporary
+///        buffer used in the sorting process
+//-----------------------------------------------------------------------------
+template<class Iter_t, typename Compare>
+void sample_sort<Iter_t, Compare>::destroy_all(void)
+{
+    if (construct)
+    {
+        destroy(global_buf);
+        construct = false;
+    }
+    if (global_buf.first != nullptr and owner)
+        std::return_temporary_buffer(global_buf.first);
+}
+//
+//-----------------------------------------------------------------------------
+//  function : initial_configuration
+/// @brief Create the internal data structures, and obtain the inital set of
+///        ranges to merge
+//-----------------------------------------------------------------------------
+template<class Iter_t, typename Compare>
+void sample_sort<Iter_t, Compare>::initial_configuration(void)
+{
+    std::vector<range_it> vmem_thread;
+    std::vector<range_buf> vbuf_thread;
+    size_t nelem = global_range.size();
+
+    //------------------------------------------------------------------------
+    size_t cupo = nelem / nthread;
+    Iter_t it_first = global_range.first;
+    value_t *buf_first = global_buf.first;
+    vmem_thread.reserve(nthread + 1);
+    vbuf_thread.reserve(nthread + 1);
+
+    for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first +=
+                    cupo)
+    {
+        vmem_thread.emplace_back(it_first, it_first + cupo);
+        vbuf_thread.emplace_back(buf_first, buf_first + cupo);
+    };
+
+    vmem_thread.emplace_back(it_first, global_range.last);
+    vbuf_thread.emplace_back(buf_first, global_buf.last);
+
+    //------------------------------------------------------------------------
+    // Sorting of the ranges
+    //------------------------------------------------------------------------
+    std::vector<std::future<void>> vfuture(nthread);
+
+    for (uint32_t i = 0; i < nthread; ++i)
+    {
+        auto func = [=]()
+        {
+            bss::spinsort<Iter_t, Compare> (vmem_thread[i].first,
+                            vmem_thread[i].last, comp,
+                            vbuf_thread[i]);
+        };
+        vfuture[i] = std::async(std::launch::async, func);
+    };
+
+    for (uint32_t i = 0; i < nthread; ++i)
+        vfuture[i].get();
+
+    //------------------------------------------------------------------------
+    // Obtain the vector of milestones
+    //------------------------------------------------------------------------
+    std::vector<Iter_t> vsample;
+    vsample.reserve(nthread * (ninterval - 1));
+
+    for (uint32_t i = 0; i < nthread; ++i)
+    {
+        size_t distance = vmem_thread[i].size() / ninterval;
+        for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
+        {
+            vsample.push_back(vmem_thread[i].first + pos);
+        };
+    };
+    typedef less_ptr_no_null<Iter_t, Compare> compare_ptr;
+    typedef typename std::vector<Iter_t>::iterator it_to_it;
+
+    bss::spinsort<it_to_it, compare_ptr>(vsample.begin(), vsample.end(),
+                    compare_ptr(comp));
+
+    //------------------------------------------------------------------------
+    // Create the final milestone vector
+    //------------------------------------------------------------------------
+    std::vector<Iter_t> vmilestone;
+    vmilestone.reserve(ninterval);
+
+    for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread)
+    {
+        vmilestone.push_back(vsample[pos]);
+    };
+
+    //------------------------------------------------------------------------
+    // Creation of the first vector of ranges
+    //------------------------------------------------------------------------
+    std::vector<std::vector<range<Iter_t>>>vv_range_first (nthread);
+
+    for (uint32_t i = 0; i < nthread; ++i)
+    {
+        Iter_t itaux = vmem_thread[i].first;
+
+        for (uint32_t k = 0; k < (ninterval - 1); ++k)
+        {
+            Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last,
+                            *vmilestone[k], comp);
+
+            vv_range_first[i].emplace_back(itaux, it2);
+            itaux = it2;
+        };
+        vv_range_first[i].emplace_back(itaux, vmem_thread[i].last);
+    };
+
+    //------------------------------------------------------------------------
+    // Copy in buffer and  creation of the final matrix of ranges
+    //------------------------------------------------------------------------
+    vv_range_it.resize(ninterval);
+    vv_range_buf.resize(ninterval);
+    vrange_it_ini.reserve(ninterval);
+    vrange_buf_ini.reserve(ninterval);
+
+    for (uint32_t i = 0; i < ninterval; ++i)
+    {
+        vv_range_it[i].reserve(nthread);
+        vv_range_buf[i].reserve(nthread);
+    };
+
+    Iter_t it = global_range.first;
+    value_t *it_buf = global_buf.first;
+
+    for (uint32_t k = 0; k < ninterval; ++k)
+    {
+        size_t nelem_interval = 0;
+
+        for (uint32_t i = 0; i < nthread; ++i)
+        {
+            size_t nelem_range = vv_range_first[i][k].size();
+            if (nelem_range != 0)
+            {
+                vv_range_it[k].push_back(vv_range_first[i][k]);
+            };
+            nelem_interval += nelem_range;
+        };
+
+        vrange_it_ini.emplace_back(it, it + nelem_interval);
+        vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval);
+
+        it += nelem_interval;
+        it_buf += nelem_interval;
+    };
+}
+;
+//
+//****************************************************************************
+}
+;
+//    End namespace sample_detail
+//****************************************************************************
+//
+namespace bscu = boost::sort::common::util;
+//
+//############################################################################
+//                                                                          ##
+//                                                                          ##
+//                       S A M P L E _ S O R T                              ##
+//                                                                          ##
+//                                                                          ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+//  function : sample_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+void sample_sort(Iter_t first, Iter_t last)
+{
+    typedef compare_iter<Iter_t> Compare;
+    sample_detail::sample_sort<Iter_t, Compare>(first, last);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : sample_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param nthread : Number of threads to use in the process. When this value
+///                  is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template<class Iter_t>
+void sample_sort(Iter_t first, Iter_t last, uint32_t nthread)
+{
+    typedef compare_iter<Iter_t> Compare;
+    sample_detail::sample_sort<Iter_t, Compare>(first, last, nthread);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : sample_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare, bscu::enable_if_not_integral<Compare> * =
+                nullptr>
+void sample_sort(Iter_t first, Iter_t last, Compare comp)
+{
+    sample_detail::sample_sort<Iter_t, Compare>(first, last, comp);
+};
+//
+//-----------------------------------------------------------------------------
+//  function : sample_sort
+/// @brief parallel sample sort  algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+/// @param nthread : Number of threads to use in the process. When this value
+///                  is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template<class Iter_t, class Compare>
+void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
+{
+    sample_detail::sample_sort<Iter_t, Compare>(first, last, comp, nthread);
+};
+//
+//****************************************************************************
+};//    End namespace sort
+};//    End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/sort.hpp b/boost/sort/sort.hpp
index bc4fe974a6..625f134b5c 100644
--- a/boost/sort/sort.hpp
+++ b/boost/sort/sort.hpp
@@ -1,19 +1,24 @@
-// The Boost Sort library cumulative header.
-
-//          Copyright Steven J. Ross 2014
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort/ for library home page.
-
-#ifndef BOOST_SORT_HPP
-#define BOOST_SORT_HPP
-
-/*
-Cumulative include for the Boost Sort library
-*/
-
-#include <boost/sort/spreadsort/spreadsort.hpp>
-
-#endif
+// The Boost Sort library cumulative header.
+
+//          Copyright Steven J. Ross 2014
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort/ for library home page.
+
+#ifndef BOOST_SORT_HPP
+#define BOOST_SORT_HPP
+
+/*
+Cumulative include for the Boost Sort library
+*/
+#include <boost/sort/spreadsort/spreadsort.hpp>
+#include <boost/sort/spinsort/spinsort.hpp>
+#include <boost/sort/flat_stable_sort/flat_stable_sort.hpp>
+#include <boost/sort/pdqsort/pdqsort.hpp>
+#include <boost/sort/block_indirect_sort/block_indirect_sort.hpp>
+#include <boost/sort/sample_sort/sample_sort.hpp>
+#include <boost/sort/parallel_stable_sort/parallel_stable_sort.hpp>
+
+#endif
diff --git a/boost/sort/spinsort/spinsort.hpp b/boost/sort/spinsort/spinsort.hpp
new file mode 100644
index 0000000000..0e9f2d5572
--- /dev/null
+++ b/boost/sort/spinsort/spinsort.hpp
@@ -0,0 +1,564 @@
+//----------------------------------------------------------------------------
+/// @file spinsort.hpp
+/// @brief Spin Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+///         Distributed under the Boost Software License, Version 1.0.\n
+///         ( See accompanying file LICENSE_1_0.txt or copy at
+///           http://www.boost.org/LICENSE_1_0.txt  )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_ALGORITHM_SPIN_SORT_HPP
+#define __BOOST_SORT_PARALLEL_ALGORITHM_SPIN_SORT_HPP
+
+//#include <boost/sort/spinsort/util/indirect.hpp>
+#include <boost/sort/insert_sort/insert_sort.hpp>
+#include <boost/sort/common/util/traits.hpp>
+#include <boost/sort/common/util/algorithm.hpp>
+#include <boost/sort/common/range.hpp>
+#include <boost/sort/common/indirect.hpp>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <vector>
+#include <cstddef>
+
+namespace boost
+{
+namespace sort
+{
+namespace spin_detail
+{
+
+//----------------------------------------------------------------------------
+//                USING SENTENCES
+//----------------------------------------------------------------------------
+namespace bsc = boost::sort::common;
+using bsc::range;
+using bsc::util::nbits64;
+using bsc::util::compare_iter;
+using bsc::util::value_iter;
+using boost::sort::insert_sort;
+
+//
+//############################################################################
+//                                                                          ##
+//          D E F I N I T I O N S    O F    F U N C T I O N S               ##
+//                                                                          ##
+//############################################################################
+//
+template <class Iter1_t, class Iter2_t, typename Compare>
+static void insert_partial_sort (Iter1_t first, Iter1_t mid,
+                                 Iter1_t last, Compare comp,
+                                 const range<Iter2_t> &rng_aux);
+
+template<class Iter1_t, class Iter2_t, class Compare>
+static bool check_stable_sort (const range<Iter1_t> &rng_data,
+                               const range<Iter2_t> &rng_aux, Compare comp);
+
+template<class Iter1_t, class Iter2_t, class Compare>
+static void range_sort (const range<Iter1_t> &range1,
+                        const range<Iter2_t> &range2, Compare comp,
+                        uint32_t level);
+
+template<class Iter1_t, class Iter2_t, class Compare>
+static void sort_range_sort (const range<Iter1_t> &rng_data,
+                             const range<Iter2_t> &rng_aux, Compare comp);
+
+//
+//-----------------------------------------------------------------------------
+//  function : insert_partial_sort
+/// @brief : Insertion sort of elements sorted
+/// @param first: iterator to the first element of the range
+/// @param mid : last pointer of the sorted data, and first pointer to the
+///               elements to insert
+/// @param last : iterator to the next element of the last in the range
+/// @param comp :
+/// @comments : the two ranges are sorted
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, typename Compare>
+static void insert_partial_sort (Iter1_t first, Iter1_t mid, Iter1_t last,
+                                 Compare comp, const range<Iter2_t> &rng_aux)
+{
+    //------------------------------------------------------------------------
+    //                 metaprogram
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same<value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //--------------------------------------------------------------------
+    //                   program
+    //--------------------------------------------------------------------
+    assert(size_t(last - mid) <= rng_aux.size());
+
+    if (mid == last) return;
+    //insertionsort ( mid, last, comp);
+    if (first == mid) return;
+
+    //------------------------------------------------------------------------
+    // creation of the vector of elements to insert and their position in the
+    // sorted part
+    // the data are inserted in rng_aux
+    //-----------------------------------------------------------------------
+    std::vector<Iter1_t> viter;
+    Iter2_t beta = rng_aux.first, data = rng_aux.first;
+
+    for (Iter1_t alpha = mid; alpha != last; ++alpha)
+        *(beta++) = std::move(*alpha);
+
+    size_t ndata = last - mid;
+
+    Iter1_t linf = first, lsup = mid;
+    for (uint32_t i = 0; i < ndata; ++i)
+    {
+        Iter1_t it1 = std::upper_bound(linf, lsup, *(data + i), comp);
+        viter.push_back(it1);
+        linf = it1;
+    };
+
+    // moving the elements
+    viter.push_back(mid);
+    for (uint32_t i = viter.size() - 1; i != 0; --i)
+    {
+        Iter1_t src = viter[i], limit = viter[i - 1];
+        Iter1_t dest = src + (i);
+        while (src != limit) *(--dest) = std::move(*(--src));
+        *(viter[i - 1] + (i - 1)) = std::move(*(data + (i - 1)));
+    };
+}
+;
+//-----------------------------------------------------------------------------
+//  function : check_stable_sort
+/// @brief check if the elements between first and last are osted or reverse
+///        sorted. If the number of elements not sorted is small, insert in
+///        the sorted part
+/// @param range_input : range with the elements to sort
+/// @param range_buffer : range with the elements sorted
+/// @param comp : object for to compare two elements
+/// @param level : when is 1, sort with the insertionsort algorithm
+///                if not make a recursive call splitting the ranges
+//
+/// @comments : if the number of levels is odd, the data are in the first
+/// parameter of range_sort, and the results appear in the second parameter
+/// If the number of levels is even, the data are in the second
+/// parameter of range_sort, and the results are in the same parameter
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static bool check_stable_sort(const range<Iter1_t> &rng_data,
+                              const range<Iter2_t> &rng_aux, Compare comp)
+{
+    //------------------------------------------------------------------------
+    //              metaprogramming
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same<value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                    program
+    //------------------------------------------------------------------------
+    // the maximun number of elements not ordered, for to be inserted in the
+    // sorted part
+    //const ptrdiff_t  min_insert_partial_sort = 32 ;
+    const size_t ndata = rng_data.size();
+    if (ndata < 32)
+    {
+        insert_sort(rng_data.first, rng_data.last, comp);
+        return true;
+    };
+    const size_t min_insert_partial_sort =
+                    ((ndata >> 3) < 33) ? 32 : (ndata >> 3);
+    if (ndata < 2) return true;
+
+    // check if sorted
+    bool sw = true;
+    Iter1_t it2 = rng_data.first + 1;
+    for (Iter1_t it1 = rng_data.first;
+                    it2 != rng_data.last and (sw = not comp(*it2, *it1)); it1 =
+                                    it2++)
+        ;
+    if (sw) return true;
+
+    // insert the elements between it1 and last
+    if (size_t(rng_data.last - it2) < min_insert_partial_sort)
+    {
+        sort_range_sort(range<Iter1_t>(it2, rng_data.last), rng_aux, comp);
+        insert_partial_sort(rng_data.first, it2, rng_data.last, comp, rng_aux);
+        return true;
+    };
+
+    // check if reverse sorted
+    if ((it2 != (rng_data.first + 1))) return false;
+    sw = true;
+    for (Iter1_t it1 = rng_data.first;
+                    it2 != rng_data.last and (sw = comp(*it2, *it1)); it1 =
+                                    it2++)
+        ;
+    if (size_t(rng_data.last - it2) >= min_insert_partial_sort) return false;
+
+    // reverse the elements between first and it1
+    size_t nreverse = it2 - rng_data.first;
+    Iter1_t alpha(rng_data.first), beta(it2 - 1), mid(
+                    rng_data.first + (nreverse >> 1));
+    while (alpha != mid)
+        std::swap(*(alpha++), *(beta--));
+
+    // insert the elements between it1 and last
+    if (it2 != rng_data.last)
+    {
+        sort_range_sort(range<Iter1_t>(it2, rng_data.last), rng_aux, comp);
+        insert_partial_sort(rng_data.first, it2, rng_data.last, comp, rng_aux);
+    };
+    return true;
+}
+;
+//-----------------------------------------------------------------------------
+//  function : range_sort
+/// @brief this function divide r_input in two parts, sort it,and merge moving
+///        the elements to range_buf
+/// @param range_input : range with the elements to sort
+/// @param range_buffer : range with the elements sorted
+/// @param comp : object for to compare two elements
+/// @param level : when is 1, sort with the insertionsort algorithm
+///                if not make a recursive call splitting the ranges
+//
+/// @comments : if the number of levels is odd, the data are in the first
+/// parameter of range_sort, and the results appear in the second parameter
+/// If the number of levels is even, the data are in the second
+/// parameter of range_sort, and the results are in the same parameter
+/// The two ranges must have the same size
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static void range_sort(const range<Iter1_t> &range1,
+                       const range<Iter2_t> &range2, Compare comp,
+                       uint32_t level)
+{
+    //-----------------------------------------------------------------------
+    //                  metaprogram
+    //-----------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same<value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //-----------------------------------------------------------------------
+    //                  program
+    //-----------------------------------------------------------------------
+    typedef range<Iter1_t> range_it1;
+    typedef range<Iter2_t> range_it2;
+    assert(range1.size() == range2.size() and level != 0);
+
+    //------------------- check if sort --------------------------------------
+    if (range1.size() > 1024)
+    {
+        if ((level & 1) == 0)
+        {
+            if (check_stable_sort(range2, range1, comp)) return;
+        }
+        else
+        {
+            if (check_stable_sort(range1, range2, comp))
+            {
+                move_forward(range2, range1);
+                return;
+            };
+        };
+    };
+
+    //------------------- normal process -----------------------------------
+    size_t nelem1 = (range1.size() + 1) >> 1;
+    range_it1 range_input1(range1.first, range1.first + nelem1),
+                           range_input2(range1.first + nelem1, range1.last);
+
+    if (level < 2)
+    {
+        insert_sort(range_input1.first, range_input1.last, comp);
+        insert_sort(range_input2.first, range_input2.last, comp);
+    }
+    else
+    {
+        range_sort (range_it2(range2.first, range2.first + nelem1),
+                    range_input1, comp, level - 1);
+
+        range_sort (range_it2(range2.first + nelem1, range2.last),
+                    range_input2, comp, level - 1);
+    };
+
+    merge(range2, range_input1, range_input2, comp);
+}
+;
+//-----------------------------------------------------------------------------
+//  function : sort_range_sort
+/// @brief this sort elements using the range_sort function and receiving a
+///        buffer of initialized memory
+/// @param rng_data : range with the elements to sort
+/// @param rng_aux : range of at least the same memory than rng_data used as
+///                  auxiliary memory in the sorting
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template<class Iter1_t, class Iter2_t, class Compare>
+static void sort_range_sort(const range<Iter1_t> &rng_data,
+                            const range<Iter2_t> &rng_aux, Compare comp)
+{
+    //-----------------------------------------------------------------------
+    //                  metaprogram
+    //-----------------------------------------------------------------------
+    typedef value_iter<Iter1_t> value_t;
+    typedef value_iter<Iter2_t> value2_t;
+    static_assert (std::is_same<value_t, value2_t>::value,
+                    "Incompatible iterators\n");
+
+    //------------------------------------------------------------------------
+    //                    program
+    //------------------------------------------------------------------------
+    // minimal number of element before to jump to insertionsort
+    static const uint32_t sort_min = 32;
+    if (rng_data.size() <= sort_min)
+    {
+        insert_sort(rng_data.first, rng_data.last, comp);
+        return;
+    };
+
+#ifdef __BS_DEBUG
+    assert (rng_aux.size () >= rng_data.size ());
+#endif
+
+    range<Iter2_t> rng_buffer(rng_aux.first, rng_aux.first + rng_data.size());
+    uint32_t nlevel =
+                    nbits64(((rng_data.size() + sort_min - 1) / sort_min) - 1);
+    //assert (nlevel != 0);
+
+    if ((nlevel & 1) == 0)
+    {
+        range_sort(rng_buffer, rng_data, comp, nlevel);
+    }
+    else
+    {
+        range_sort(rng_data, rng_buffer, comp, nlevel);
+        move_forward(rng_data, rng_buffer);
+    };
+}
+;
+//
+//############################################################################
+//                                                                          ##
+//                              S T R U C T                                 ##
+//                                                                          ##
+//                           S P I N _ S O R T                              ##
+//                                                                          ##
+//############################################################################
+//---------------------------------------------------------------------------
+/// @struct spin_sort
+/// @brief  This class implement s stable sort algorithm with 1 thread, with
+///         an auxiliary memory of N/2 elements
+//----------------------------------------------------------------------------
+template<class Iter_t, typename Compare = compare_iter<Iter_t>>
+class spinsort
+{
+    //------------------------------------------------------------------------
+    //               DEFINITIONS AND CONSTANTS
+    //------------------------------------------------------------------------
+    typedef value_iter<Iter_t> value_t;
+    typedef range<Iter_t> range_it;
+    typedef range<value_t *> range_buf;
+    // When the number of elements to sort is smaller than Sort_min, are sorted
+    // by the insertion sort algorithm
+    static const uint32_t Sort_min = 36;
+
+    //------------------------------------------------------------------------
+    //                      VARIABLES
+    //------------------------------------------------------------------------
+    // Pointer to the auxiliary memory
+    value_t *ptr;
+
+    // Number of elements in the auxiliary memory
+    size_t nptr;
+
+    // construct indicate if the auxiliary memory in initialized, and owner
+    // indicate if the auxiliary memory had been created inside the object or
+    // had
+    // been received as a parameter
+    bool construct = false, owner = false;
+
+    //------------------------------------------------------------------------
+    //                   PRIVATE FUNCTIONS
+    //-------------------------------------------------------------------------
+    spinsort (Iter_t first, Iter_t last, Compare comp, value_t *paux,
+               size_t naux);
+
+public:
+    //------------------------------------------------------------------------
+    //                   PUBLIC FUNCTIONS
+    //-------------------------------------------------------------------------
+    spinsort(Iter_t first, Iter_t last, Compare comp = Compare())
+    : spinsort(first, last, comp, nullptr, 0) { };
+
+    spinsort(Iter_t first, Iter_t last, Compare comp, range_buf range_aux)
+    : spinsort(first, last, comp, range_aux.first, range_aux.size()) { };
+    //
+    //-----------------------------------------------------------------------
+    //  function :~spinsort
+    /// @brief destructor of the struct. Destroy the elements if construct is
+    /// true,
+    ///        and return the memory if owner is true
+    //-----------------------------------------------------------------------
+    ~spinsort(void)
+    {
+        if (construct)
+        {
+            destroy(range<value_t *>(ptr, ptr + nptr));
+            construct = false;
+        };
+        if (owner and ptr != nullptr) std::return_temporary_buffer(ptr);
+    };
+};
+//----------------------------------------------------------------------------
+//        End of class spinsort
+//----------------------------------------------------------------------------
+//
+//-------------------------------------------------------------------------
+//  function : spinsort
+/// @brief constructor of the struct
+//
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+/// @param paux : pointer to the auxiliary memory provided. If nullptr, the
+///               memory is created inside the class
+/// @param naux : number of elements pointed by paux
+//------------------------------------------------------------------------
+template <class Iter_t, typename Compare>
+spinsort <Iter_t, Compare>
+::spinsort (Iter_t first, Iter_t last, Compare comp, value_t *paux, size_t naux)
+: ptr(paux), nptr(naux), construct(false), owner(false)
+{
+    range<Iter_t> range_input(first, last);
+    assert(range_input.valid());
+
+    size_t nelem = range_input.size();
+    owner = construct = false;
+
+    nptr = (nelem + 1) >> 1;
+    size_t nelem_1 = nptr;
+    size_t nelem_2 = nelem - nelem_1;
+
+    if (nelem <= (Sort_min << 1))
+    {
+        insert_sort(range_input.first, range_input.last, comp);
+        return;
+    };
+
+    //------------------- check if sort ---------------------------------
+    bool sw = true;
+    for (Iter_t it1 = first, it2 = first + 1; it2 != last
+         and (sw = not comp(*it2, *it1)); it1 = it2++) ;
+    if (sw) return;
+
+    //------------------- check if reverse sort -------------------------
+    sw = true;
+    for (Iter_t it1 = first, it2 = first + 1;
+         it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
+    if (sw)
+    {
+        size_t nelem2 = nelem >> 1;
+        Iter_t it1 = first, it2 = last - 1;
+        for (size_t i = 0; i < nelem2; ++i)
+            std::swap(*(it1++), *(it2--));
+        return;
+    };
+
+    if (ptr == nullptr)
+    {
+        ptr = std::get_temporary_buffer<value_t>(nptr).first;
+        if (ptr == nullptr) throw std::bad_alloc();
+        owner = true;
+    };
+    range_buf range_aux(ptr, (ptr + nptr));
+
+    //---------------------------------------------------------------------
+    //                  Process
+    //---------------------------------------------------------------------
+    uint32_t nlevel = nbits64(((nelem + Sort_min - 1) / Sort_min) - 1) - 1;
+    assert(nlevel != 0);
+
+    if ((nlevel & 1) == 1)
+    {
+        //----------------------------------------------------------------
+        // if the number of levels is odd, the data are in the first
+        // parameter of range_sort, and the results appear in the second
+        // parameter
+        //----------------------------------------------------------------
+        range_it range_1(first, first + nelem_2), range_2(first + nelem_2,
+                        last);
+        range_aux = move_construct(range_aux, range_2);
+        construct = true;
+
+        range_sort(range_aux, range_2, comp, nlevel);
+        range_buf rng_bx(range_aux.first, range_aux.first + nelem_2);
+
+        range_sort(range_1, rng_bx, comp, nlevel);
+        merge_half(range_input, rng_bx, range_2, comp);
+    }
+    else
+    {
+        //----------------------------------------------------------------
+        // If the number of levels is even, the data are in the second
+        // parameter of range_sort, and the results are in the same
+        //  parameter
+        //----------------------------------------------------------------
+        range_it range_1(first, first + nelem_1), range_2(first + nelem_1,
+                        last);
+        range_aux = move_construct(range_aux, range_1);
+        construct = true;
+
+        range_sort(range_1, range_aux, comp, nlevel);
+
+        range_1.last = range_1.first + range_2.size();
+        range_sort(range_1, range_2, comp, nlevel);
+        merge_half(range_input, range_aux, range_2, comp);
+    };
+};
+
+//****************************************************************************
+};//    End namepspace spin_detail
+//****************************************************************************
+//
+namespace bsc = boost::sort::common;
+//-----------------------------------------------------------------------------
+//  function : spinsort
+/// @brief this function implement a single thread stable sort
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+///               iterators
+//-----------------------------------------------------------------------------
+template <class Iter_t, class Compare = compare_iter<Iter_t>>
+inline void spinsort (Iter_t first, Iter_t last, Compare comp = Compare())
+{
+    spin_detail::spinsort <Iter_t, Compare> (first, last, comp);
+};
+
+template <class Iter_t, class Compare = compare_iter<Iter_t>>
+inline void indirect_spinsort (Iter_t first, Iter_t last,
+                               Compare comp = Compare())
+{
+    typedef typename std::vector<Iter_t>::iterator itx_iter;
+    typedef common::less_ptr_no_null <Iter_t, Compare> itx_comp;
+    common::indirect_sort (spinsort<itx_iter, itx_comp>, first, last, comp);
+};
+
+//****************************************************************************
+};//    End namespace sort
+};//    End namepspace boost
+//****************************************************************************
+//
+#endif
diff --git a/boost/sort/spreadsort/detail/constants.hpp b/boost/sort/spreadsort/detail/constants.hpp
index a134761e59..9eebc43c69 100644
--- a/boost/sort/spreadsort/detail/constants.hpp
+++ b/boost/sort/spreadsort/detail/constants.hpp
@@ -1,46 +1,46 @@
-//constant definitions for the Boost Sort library
-
-//          Copyright Steven J. Ross 2001 - 2014
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-//  See http://www.boost.org/libs/sort for library home page.
-#ifndef BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS
-#define BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS
-namespace boost {
-namespace sort {
-namespace spreadsort {
-namespace detail {
-//Tuning constants
-//This should be tuned to your processor cache;
-//if you go too large you get cache misses on bins
-//The smaller this number, the less worst-case memory usage.
-//If too small, too many recursions slow down spreadsort
-enum { max_splits = 11,
-//It's better to have a few cache misses and finish sorting
-//than to run another iteration
-max_finishing_splits = max_splits + 1,
-//Sets the minimum number of items per bin.
-int_log_mean_bin_size = 2,
-//Used to force a comparison-based sorting for small bins, if it's faster.
-//Minimum value 1
-int_log_min_split_count = 9,
-//This is the minimum split count to use spreadsort when it will finish in one
-//iteration.  Make this larger the faster std::sort is relative to integer_sort.
-int_log_finishing_count = 31,
-//Sets the minimum number of items per bin for floating point.
-float_log_mean_bin_size = 2,
-//Used to force a comparison-based sorting for small bins, if it's faster.
-//Minimum value 1
-float_log_min_split_count = 8,
-//This is the minimum split count to use spreadsort when it will finish in one
-//iteration.  Make this larger the faster std::sort is relative to float_sort.
-float_log_finishing_count = 4,
-//There is a minimum size below which it is not worth using spreadsort
-min_sort_size = 1000 };
-}
-}
-}
-}
-#endif
+//constant definitions for the Boost Sort library
+
+//          Copyright Steven J. Ross 2001 - 2014
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+//  See http://www.boost.org/libs/sort for library home page.
+#ifndef BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS
+#define BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS
+namespace boost {
+namespace sort {
+namespace spreadsort {
+namespace detail {
+//Tuning constants
+//This should be tuned to your processor cache;
+//if you go too large you get cache misses on bins
+//The smaller this number, the less worst-case memory usage.
+//If too small, too many recursions slow down spreadsort
+enum { max_splits = 11,
+//It's better to have a few cache misses and finish sorting
+//than to run another iteration
+max_finishing_splits = max_splits + 1,
+//Sets the minimum number of items per bin.
+int_log_mean_bin_size = 2,
+//Used to force a comparison-based sorting for small bins, if it's faster.
+//Minimum value 1
+int_log_min_split_count = 9,
+//This is the minimum split count to use spreadsort when it will finish in one
+//iteration.  Make this larger the faster std::sort is relative to integer_sort.
+int_log_finishing_count = 31,
+//Sets the minimum number of items per bin for floating point.
+float_log_mean_bin_size = 2,
+//Used to force a comparison-based sorting for small bins, if it's faster.
+//Minimum value 1
+float_log_min_split_count = 8,
+//This is the minimum split count to use spreadsort when it will finish in one
+//iteration.  Make this larger the faster std::sort is relative to float_sort.
+float_log_finishing_count = 4,
+//There is a minimum size below which it is not worth using spreadsort
+min_sort_size = 1000 };
+}
+}
+}
+}
+#endif
diff --git a/boost/sort/spreadsort/detail/float_sort.hpp b/boost/sort/spreadsort/detail/float_sort.hpp
index 03dcbaf4f6..93aaa2f69e 100644
--- a/boost/sort/spreadsort/detail/float_sort.hpp
+++ b/boost/sort/spreadsort/detail/float_sort.hpp
@@ -1,831 +1,831 @@
-// Details for templated Spreadsort-based float_sort.
-
-//          Copyright Steven J. Ross 2001 - 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-float_mem_cast fix provided by:
-Scott McMurray
-*/
-
-#ifndef BOOST_SORT_SPREADSORT_DETAIL_FLOAT_SORT_HPP
-#define BOOST_SORT_SPREADSORT_DETAIL_FLOAT_SORT_HPP
-#include <algorithm>
-#include <vector>
-#include <limits>
-#include <functional>
-#include <boost/static_assert.hpp>
-#include <boost/serialization/static_warning.hpp>
-#include <boost/utility/enable_if.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/sort/spreadsort/detail/integer_sort.hpp>
-#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
-#include <boost/cstdint.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
-  namespace detail {
-    //Casts a RandomAccessIter to the specified integer type
-    template<class Cast_type, class RandomAccessIter>
-    inline Cast_type
-    cast_float_iter(const RandomAccessIter & floatiter)
-    {
-      typedef typename std::iterator_traits<RandomAccessIter>::value_type
-        Data_type;
-      //Only cast IEEE floating-point numbers, and only to same-sized integers
-      BOOST_STATIC_ASSERT(sizeof(Cast_type) == sizeof(Data_type));
-      BOOST_STATIC_ASSERT(std::numeric_limits<Data_type>::is_iec559);
-      BOOST_STATIC_ASSERT(std::numeric_limits<Cast_type>::is_integer);
-      Cast_type result;
-      std::memcpy(&result, &(*floatiter), sizeof(Data_type));
-      return result;
-    }
-
-    // Return true if the list is sorted.  Otherwise, find the minimum and
-    // maximum.  Values are Right_shifted 0 bits before comparison.
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline bool
-    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
-                  Div_type & max, Div_type & min, Right_shift rshift)
-    {
-      min = max = rshift(*current, 0);
-      RandomAccessIter prev = current;
-      bool sorted = true;
-      while (++current < last) {
-        Div_type value = rshift(*current, 0);
-        sorted &= *current >= *prev;
-        prev = current;
-        if (max < value)
-          max = value;
-        else if (value < min)
-          min = value;
-      }
-      return sorted;
-    }
-
-    // Return true if the list is sorted.  Otherwise, find the minimum and
-    // maximum.  Uses comp to check if the data is already sorted.
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    inline bool
-    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
-                               Div_type & max, Div_type & min, 
-                               Right_shift rshift, Compare comp)
-    {
-      min = max = rshift(*current, 0);
-      RandomAccessIter prev = current;
-      bool sorted = true;
-      while (++current < last) {
-        Div_type value = rshift(*current, 0);
-        sorted &= !comp(*current, *prev);
-        prev = current;
-        if (max < value)
-          max = value;
-        else if (value < min)
-          min = value;
-      }
-      return sorted;
-    }
-
-    //Specialized swap loops for floating-point casting
-    template <class RandomAccessIter, class Div_type>
-    inline void inner_float_swap_loop(RandomAccessIter * bins,
-                        const RandomAccessIter & nextbinstart, unsigned ii
-                        , const unsigned log_divisor, const Div_type div_min)
-    {
-      RandomAccessIter * local_bin = bins + ii;
-      for (RandomAccessIter current = *local_bin; current < nextbinstart;
-          ++current) {
-        for (RandomAccessIter * target_bin =
-            (bins + ((cast_float_iter<Div_type, RandomAccessIter>(current) >>
-                      log_divisor) - div_min));  target_bin != local_bin;
-          target_bin = bins + ((cast_float_iter<Div_type, RandomAccessIter>
-                               (current) >> log_divisor) - div_min)) {
-          typename std::iterator_traits<RandomAccessIter>::value_type tmp;
-          RandomAccessIter b = (*target_bin)++;
-          RandomAccessIter * b_bin = bins + ((cast_float_iter<Div_type,
-                              RandomAccessIter>(b) >> log_divisor) - div_min);
-          //Three-way swap; if the item to be swapped doesn't belong in the
-          //current bin, swap it to where it belongs
-          if (b_bin != local_bin) {
-            RandomAccessIter c = (*b_bin)++;
-            tmp = *c;
-            *c = *b;
-          }
-          else
-            tmp = *b;
-          *b = *current;
-          *current = tmp;
-        }
-      }
-      *local_bin = nextbinstart;
-    }
-
-    template <class RandomAccessIter, class Div_type>
-    inline void float_swap_loop(RandomAccessIter * bins,
-                          RandomAccessIter & nextbinstart, unsigned ii,
-                          const size_t *bin_sizes,
-                          const unsigned log_divisor, const Div_type div_min)
-    {
-      nextbinstart += bin_sizes[ii];
-      inner_float_swap_loop<RandomAccessIter, Div_type>
-        (bins, nextbinstart, ii, log_divisor, div_min);
-    }
-
-    // Return true if the list is sorted.  Otherwise, find the minimum and
-    // maximum.  Values are cast to Cast_type before comparison.
-    template <class RandomAccessIter, class Cast_type>
-    inline bool
-    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
-                  Cast_type & max, Cast_type & min)
-    {
-      min = max = cast_float_iter<Cast_type, RandomAccessIter>(current);
-      RandomAccessIter prev = current;
-      bool sorted = true;
-      while (++current < last) {
-        Cast_type value = cast_float_iter<Cast_type, RandomAccessIter>(current);
-        sorted &= *current >= *prev;
-        prev = current;
-        if (max < value)
-          max = value;
-        else if (value < min)
-          min = value;
-      }
-      return sorted;
-    }
-
-    //Special-case sorting of positive floats with casting
-    template <class RandomAccessIter, class Div_type, class Size_type>
-    inline void
-    positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-              , size_t *bin_sizes)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last, 
-                                                                max, min))
-        return;
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
-            current++) >> log_divisor) - div_min)]++;
-      bins[0] = first;
-      for (unsigned u = 0; u < bin_count - 1; u++)
-        bins[u + 1] = bins[u] + bin_sizes[u];
-
-
-      //Swap into place
-      RandomAccessIter nextbinstart = first;
-      for (unsigned u = 0; u < bin_count - 1; ++u)
-        float_swap_loop<RandomAccessIter, Div_type>
-          (bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
-      bins[bin_count - 1] = last;
-
-      //Return if we've completed bucketsorting
-      if (!log_divisor)
-        return;
-
-      //Recursing
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
-          ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u]);
-        else
-          positive_float_sort_rec<RandomAccessIter, Div_type, Size_type>
-            (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
-      }
-    }
-
-    //Sorting negative floats
-    //Bins are iterated in reverse because max_neg_float = min_neg_int
-    template <class RandomAccessIter, class Div_type, class Size_type>
-    inline void
-    negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-                        std::vector<RandomAccessIter> &bin_cache,
-                        unsigned cache_offset, size_t *bin_sizes)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last, 
-                                                                 max, min))
-        return;
-
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
-            current++) >> log_divisor) - div_min)]++;
-      bins[bin_count - 1] = first;
-      for (int ii = bin_count - 2; ii >= 0; --ii)
-        bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
-
-      //Swap into place
-      RandomAccessIter nextbinstart = first;
-      //The last bin will always have the correct elements in it
-      for (int ii = bin_count - 1; ii > 0; --ii)
-        float_swap_loop<RandomAccessIter, Div_type>
-          (bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
-      //Update the end position because we don't process the last bin
-      bin_cache[cache_offset] = last;
-
-      //Return if we've completed bucketsorting
-      if (!log_divisor)
-        return;
-
-      //Recursing
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
-          lastPos = bin_cache[ii], --ii) {
-        size_t count = bin_cache[ii] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[ii]);
-        else
-          negative_float_sort_rec<RandomAccessIter, Div_type, Size_type>
-            (lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
-      }
-    }
-
-    //Sorting negative floats
-    //Bins are iterated in reverse order because max_neg_float = min_neg_int
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Size_type>
-    inline void
-    negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-              , size_t *bin_sizes, Right_shift rshift)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min, rshift))
-        return;
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
-      bins[bin_count - 1] = first;
-      for (int ii = bin_count - 2; ii >= 0; --ii)
-        bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
-
-      //Swap into place
-      RandomAccessIter nextbinstart = first;
-      //The last bin will always have the correct elements in it
-      for (int ii = bin_count - 1; ii > 0; --ii)
-        swap_loop<RandomAccessIter, Div_type, Right_shift>
-          (bins, nextbinstart, ii, rshift, bin_sizes, log_divisor, div_min);
-      //Update the end position of the unprocessed last bin
-      bin_cache[cache_offset] = last;
-
-      //Return if we've completed bucketsorting
-      if (!log_divisor)
-        return;
-
-      //Recursing
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
-          lastPos = bin_cache[ii], --ii) {
-        size_t count = bin_cache[ii] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[ii]);
-        else
-          negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
-                                  Size_type>
-            (lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, rshift);
-      }
-    }
-
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare, class Size_type>
-    inline void
-    negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-            std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset,
-            size_t *bin_sizes, Right_shift rshift, Compare comp)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp))
-        return;
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
-      bins[bin_count - 1] = first;
-      for (int ii = bin_count - 2; ii >= 0; --ii)
-        bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
-
-      //Swap into place
-      RandomAccessIter nextbinstart = first;
-      //The last bin will always have the correct elements in it
-      for (int ii = bin_count - 1; ii > 0; --ii)
-        swap_loop<RandomAccessIter, Div_type, Right_shift>
-          (bins, nextbinstart, ii, rshift, bin_sizes, log_divisor, div_min);
-      //Update the end position of the unprocessed last bin
-      bin_cache[cache_offset] = last;
-
-      //Return if we've completed bucketsorting
-      if (!log_divisor)
-        return;
-
-      //Recursing
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
-          lastPos = bin_cache[ii], --ii) {
-        size_t count = bin_cache[ii] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[ii], comp);
-        else
-          negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
-                                  Compare, Size_type>(lastPos, bin_cache[ii],
-                                                      bin_cache, cache_end,
-                                                      bin_sizes, rshift, comp);
-      }
-    }
-
-    //Casting special-case for floating-point sorting
-    template <class RandomAccessIter, class Div_type, class Size_type>
-    inline void
-    float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-                std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-                , size_t *bin_sizes)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last, 
-                                                                max, min))
-        return;
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
-            current++) >> log_divisor) - div_min)]++;
-      //The index of the first positive bin
-      //Must be divided small enough to fit into an integer
-      unsigned first_positive = (div_min < 0) ? unsigned(-div_min) : 0;
-      //Resetting if all bins are negative
-      if (cache_offset + first_positive > cache_end)
-        first_positive = cache_end - cache_offset;
-      //Reversing the order of the negative bins
-      //Note that because of the negative/positive ordering direction flip
-      //We can not depend upon bin order and positions matching up
-      //so bin_sizes must be reused to contain the end of the bin
-      if (first_positive > 0) {
-        bins[first_positive - 1] = first;
-        for (int ii = first_positive - 2; ii >= 0; --ii) {
-          bins[ii] = first + bin_sizes[ii + 1];
-          bin_sizes[ii] += bin_sizes[ii + 1];
-        }
-        //Handling positives following negatives
-        if (first_positive < bin_count) {
-          bins[first_positive] = first + bin_sizes[0];
-          bin_sizes[first_positive] += bin_sizes[0];
-        }
-      }
-      else
-        bins[0] = first;
-      for (unsigned u = first_positive; u < bin_count - 1; u++) {
-        bins[u + 1] = first + bin_sizes[u];
-        bin_sizes[u + 1] += bin_sizes[u];
-      }
-
-      //Swap into place
-      RandomAccessIter nextbinstart = first;
-      for (unsigned u = 0; u < bin_count; ++u) {
-        nextbinstart = first + bin_sizes[u];
-        inner_float_swap_loop<RandomAccessIter, Div_type>
-          (bins, nextbinstart, u, log_divisor, div_min);
-      }
-
-      if (!log_divisor)
-        return;
-
-      //Handling negative values first
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (int ii = cache_offset + first_positive - 1; 
-           ii >= static_cast<int>(cache_offset);
-           lastPos = bin_cache[ii--]) {
-        size_t count = bin_cache[ii] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[ii]);
-        //sort negative values using reversed-bin spreadsort
-        else
-          negative_float_sort_rec<RandomAccessIter, Div_type, Size_type>
-            (lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
-      }
-
-      for (unsigned u = cache_offset + first_positive; u < cache_end;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u]);
-        //sort positive values using normal spreadsort
-        else
-          positive_float_sort_rec<RandomAccessIter, Div_type, Size_type>
-            (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
-      }
-    }
-
-    //Functor implementation for recursive sorting
-    template <class RandomAccessIter, class Div_type, class Right_shift
-      , class Size_type>
-    inline void
-    float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-              , size_t *bin_sizes, Right_shift rshift)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min, rshift))
-        return;
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
-      //The index of the first positive bin
-      unsigned first_positive = (div_min < 0) ? unsigned(-div_min) : 0;
-      //Resetting if all bins are negative
-      if (cache_offset + first_positive > cache_end)
-        first_positive = cache_end - cache_offset;
-      //Reversing the order of the negative bins
-      //Note that because of the negative/positive ordering direction flip
-      //We can not depend upon bin order and positions matching up
-      //so bin_sizes must be reused to contain the end of the bin
-      if (first_positive > 0) {
-        bins[first_positive - 1] = first;
-        for (int ii = first_positive - 2; ii >= 0; --ii) {
-          bins[ii] = first + bin_sizes[ii + 1];
-          bin_sizes[ii] += bin_sizes[ii + 1];
-        }
-        //Handling positives following negatives
-        if (static_cast<unsigned>(first_positive) < bin_count) {
-          bins[first_positive] = first + bin_sizes[0];
-          bin_sizes[first_positive] += bin_sizes[0];
-        }
-      }
-      else
-        bins[0] = first;
-      for (unsigned u = first_positive; u < bin_count - 1; u++) {
-        bins[u + 1] = first + bin_sizes[u];
-        bin_sizes[u + 1] += bin_sizes[u];
-      }
-
-      //Swap into place
-      RandomAccessIter next_bin_start = first;
-      for (unsigned u = 0; u < bin_count; ++u) {
-        next_bin_start = first + bin_sizes[u];
-        inner_swap_loop<RandomAccessIter, Div_type, Right_shift>
-          (bins, next_bin_start, u, rshift, log_divisor, div_min);
-      }
-
-      //Return if we've completed bucketsorting
-      if (!log_divisor)
-        return;
-
-      //Handling negative values first
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (int ii = cache_offset + first_positive - 1; 
-           ii >= static_cast<int>(cache_offset);
-           lastPos = bin_cache[ii--]) {
-        size_t count = bin_cache[ii] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[ii]);
-        //sort negative values using reversed-bin spreadsort
-        else
-          negative_float_sort_rec<RandomAccessIter, Div_type,
-            Right_shift, Size_type>(lastPos, bin_cache[ii], bin_cache,
-                                    cache_end, bin_sizes, rshift);
-      }
-
-      for (unsigned u = cache_offset + first_positive; u < cache_end;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u]);
-        //sort positive values using normal spreadsort
-        else
-          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
-                          float_log_mean_bin_size, float_log_min_split_count,
-                          float_log_finishing_count>
-            (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
-      }
-    }
-
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare, class Size_type>
-    inline void
-    float_sort_rec(RandomAccessIter first, RandomAccessIter last,
-            std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset,
-            size_t *bin_sizes, Right_shift rshift, Compare comp)
-    {
-      Div_type max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp))
-        return;
-      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type(max - min)));
-      Div_type div_min = min >> log_divisor;
-      Div_type div_max = max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
-      //The index of the first positive bin
-      unsigned first_positive = 
-        (div_min < 0) ? static_cast<unsigned>(-div_min) : 0;
-      //Resetting if all bins are negative
-      if (cache_offset + first_positive > cache_end)
-        first_positive = cache_end - cache_offset;
-      //Reversing the order of the negative bins
-      //Note that because of the negative/positive ordering direction flip
-      //We can not depend upon bin order and positions matching up
-      //so bin_sizes must be reused to contain the end of the bin
-      if (first_positive > 0) {
-        bins[first_positive - 1] = first;
-        for (int ii = first_positive - 2; ii >= 0; --ii) {
-          bins[ii] = first + bin_sizes[ii + 1];
-          bin_sizes[ii] += bin_sizes[ii + 1];
-        }
-        //Handling positives following negatives
-        if (static_cast<unsigned>(first_positive) < bin_count) {
-          bins[first_positive] = first + bin_sizes[0];
-          bin_sizes[first_positive] += bin_sizes[0];
-        }
-      }
-      else
-        bins[0] = first;
-      for (unsigned u = first_positive; u < bin_count - 1; u++) {
-        bins[u + 1] = first + bin_sizes[u];
-        bin_sizes[u + 1] += bin_sizes[u];
-      }
-
-      //Swap into place
-      RandomAccessIter next_bin_start = first;
-      for (unsigned u = 0; u < bin_count; ++u) {
-        next_bin_start = first + bin_sizes[u];
-        inner_swap_loop<RandomAccessIter, Div_type, Right_shift>
-          (bins, next_bin_start, u, rshift, log_divisor, div_min);
-      }
-
-      //Return if we've completed bucketsorting
-      if (!log_divisor)
-        return;
-
-      //Handling negative values first
-      size_t max_count = get_min_count<float_log_mean_bin_size,
-                                       float_log_min_split_count,
-                                       float_log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (int ii = cache_offset + first_positive - 1; 
-           ii >= static_cast<int>(cache_offset);
-           lastPos = bin_cache[ii--]) {
-        size_t count = bin_cache[ii] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[ii], comp);
-        //sort negative values using reversed-bin spreadsort
-        else
-          negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
-                                  Compare, Size_type>(lastPos, bin_cache[ii],
-                                                      bin_cache, cache_end,
-                                                      bin_sizes, rshift, comp);
-      }
-
-      for (unsigned u = cache_offset + first_positive; u < cache_end;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u], comp);
-        //sort positive values using normal spreadsort
-        else
-          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
-                          Size_type, float_log_mean_bin_size,
-                          float_log_min_split_count, float_log_finishing_count>
-      (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
-      }
-    }
-
-    //Checking whether the value type is a float, and trying a 32-bit integer
-    template <class RandomAccessIter>
-    inline typename boost::enable_if_c< sizeof(boost::uint32_t) ==
-      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
-      && std::numeric_limits<typename
-      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
-      void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      float_sort_rec<RandomAccessIter, boost::int32_t, boost::uint32_t>
-        (first, last, bin_cache, 0, bin_sizes);
-    }
-
-    //Checking whether the value type is a double, and using a 64-bit integer
-    template <class RandomAccessIter>
-    inline typename boost::enable_if_c< sizeof(boost::uint64_t) ==
-      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
-      && std::numeric_limits<typename
-      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
-      void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      float_sort_rec<RandomAccessIter, boost::int64_t, boost::uint64_t>
-        (first, last, bin_cache, 0, bin_sizes);
-    }
-
-    template <class RandomAccessIter>
-    inline typename boost::disable_if_c< (sizeof(boost::uint64_t) ==
-      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
-      || sizeof(boost::uint32_t) ==
-      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type))
-      && std::numeric_limits<typename
-      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
-      void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last)
-    {
-      BOOST_STATIC_WARNING(!(sizeof(boost::uint64_t) ==
-      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
-      || sizeof(boost::uint32_t) ==
-      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type))
-      || !std::numeric_limits<typename
-      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559);
-      std::sort(first, last);
-    }
-
-    //These approaches require the user to do the typecast
-    //with rshift but default comparision
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline typename boost::enable_if_c< sizeof(size_t) >= sizeof(Div_type),
-      void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-               Right_shift rshift)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      float_sort_rec<RandomAccessIter, Div_type, Right_shift, size_t>
-        (first, last, bin_cache, 0, bin_sizes, rshift);
-    }
-
-    //maximum integer size with rshift but default comparision
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline typename boost::enable_if_c< sizeof(size_t) < sizeof(Div_type)
-      && sizeof(boost::uintmax_t) >= sizeof(Div_type), void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-               Right_shift rshift)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      float_sort_rec<RandomAccessIter, Div_type, Right_shift, boost::uintmax_t>
-        (first, last, bin_cache, 0, bin_sizes, rshift);
-    }
-
-    //sizeof(Div_type) doesn't match, so use std::sort
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline typename boost::disable_if_c< sizeof(boost::uintmax_t) >=
-      sizeof(Div_type), void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-               Right_shift rshift)
-    {
-      BOOST_STATIC_WARNING(sizeof(boost::uintmax_t) >= sizeof(Div_type));
-      std::sort(first, last);
-    }
-
-    //specialized comparison
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    inline typename boost::enable_if_c< sizeof(size_t) >= sizeof(Div_type),
-      void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-               Right_shift rshift, Compare comp)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      float_sort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
-        size_t>
-        (first, last, bin_cache, 0, bin_sizes, rshift, comp);
-    }
-
-    //max-sized integer with specialized comparison
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    inline typename boost::enable_if_c< sizeof(size_t) < sizeof(Div_type)
-      && sizeof(boost::uintmax_t) >= sizeof(Div_type), void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-               Right_shift rshift, Compare comp)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      float_sort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
-        boost::uintmax_t>
-        (first, last, bin_cache, 0, bin_sizes, rshift, comp);
-    }
-
-    //sizeof(Div_type) doesn't match, so use std::sort
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    inline typename boost::disable_if_c< sizeof(boost::uintmax_t) >=
-      sizeof(Div_type), void >::type
-    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-               Right_shift rshift, Compare comp)
-    {
-      BOOST_STATIC_WARNING(sizeof(boost::uintmax_t) >= sizeof(Div_type));
-      std::sort(first, last, comp);
-    }
-  }
-}
-}
-}
-
-#endif
+// Details for templated Spreadsort-based float_sort.
+
+//          Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+float_mem_cast fix provided by:
+Scott McMurray
+*/
+
+#ifndef BOOST_SORT_SPREADSORT_DETAIL_FLOAT_SORT_HPP
+#define BOOST_SORT_SPREADSORT_DETAIL_FLOAT_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <limits>
+#include <functional>
+#include <boost/static_assert.hpp>
+#include <boost/serialization/static_warning.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/integer_sort.hpp>
+#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
+#include <boost/cstdint.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+  namespace detail {
+    //Casts a RandomAccessIter to the specified integer type
+    template<class Cast_type, class RandomAccessIter>
+    inline Cast_type
+    cast_float_iter(const RandomAccessIter & floatiter)
+    {
+      typedef typename std::iterator_traits<RandomAccessIter>::value_type
+        Data_type;
+      //Only cast IEEE floating-point numbers, and only to same-sized integers
+      BOOST_STATIC_ASSERT(sizeof(Cast_type) == sizeof(Data_type));
+      BOOST_STATIC_ASSERT(std::numeric_limits<Data_type>::is_iec559);
+      BOOST_STATIC_ASSERT(std::numeric_limits<Cast_type>::is_integer);
+      Cast_type result;
+      std::memcpy(&result, &(*floatiter), sizeof(Data_type));
+      return result;
+    }
+
+    // Return true if the list is sorted.  Otherwise, find the minimum and
+    // maximum.  Values are Right_shifted 0 bits before comparison.
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline bool
+    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
+                  Div_type & max, Div_type & min, Right_shift rshift)
+    {
+      min = max = rshift(*current, 0);
+      RandomAccessIter prev = current;
+      bool sorted = true;
+      while (++current < last) {
+        Div_type value = rshift(*current, 0);
+        sorted &= *current >= *prev;
+        prev = current;
+        if (max < value)
+          max = value;
+        else if (value < min)
+          min = value;
+      }
+      return sorted;
+    }
+
+    // Return true if the list is sorted.  Otherwise, find the minimum and
+    // maximum.  Uses comp to check if the data is already sorted.
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    inline bool
+    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
+                               Div_type & max, Div_type & min, 
+                               Right_shift rshift, Compare comp)
+    {
+      min = max = rshift(*current, 0);
+      RandomAccessIter prev = current;
+      bool sorted = true;
+      while (++current < last) {
+        Div_type value = rshift(*current, 0);
+        sorted &= !comp(*current, *prev);
+        prev = current;
+        if (max < value)
+          max = value;
+        else if (value < min)
+          min = value;
+      }
+      return sorted;
+    }
+
+    //Specialized swap loops for floating-point casting
+    template <class RandomAccessIter, class Div_type>
+    inline void inner_float_swap_loop(RandomAccessIter * bins,
+                        const RandomAccessIter & nextbinstart, unsigned ii
+                        , const unsigned log_divisor, const Div_type div_min)
+    {
+      RandomAccessIter * local_bin = bins + ii;
+      for (RandomAccessIter current = *local_bin; current < nextbinstart;
+          ++current) {
+        for (RandomAccessIter * target_bin =
+            (bins + ((cast_float_iter<Div_type, RandomAccessIter>(current) >>
+                      log_divisor) - div_min));  target_bin != local_bin;
+          target_bin = bins + ((cast_float_iter<Div_type, RandomAccessIter>
+                               (current) >> log_divisor) - div_min)) {
+          typename std::iterator_traits<RandomAccessIter>::value_type tmp;
+          RandomAccessIter b = (*target_bin)++;
+          RandomAccessIter * b_bin = bins + ((cast_float_iter<Div_type,
+                              RandomAccessIter>(b) >> log_divisor) - div_min);
+          //Three-way swap; if the item to be swapped doesn't belong in the
+          //current bin, swap it to where it belongs
+          if (b_bin != local_bin) {
+            RandomAccessIter c = (*b_bin)++;
+            tmp = *c;
+            *c = *b;
+          }
+          else
+            tmp = *b;
+          *b = *current;
+          *current = tmp;
+        }
+      }
+      *local_bin = nextbinstart;
+    }
+
+    template <class RandomAccessIter, class Div_type>
+    inline void float_swap_loop(RandomAccessIter * bins,
+                          RandomAccessIter & nextbinstart, unsigned ii,
+                          const size_t *bin_sizes,
+                          const unsigned log_divisor, const Div_type div_min)
+    {
+      nextbinstart += bin_sizes[ii];
+      inner_float_swap_loop<RandomAccessIter, Div_type>
+        (bins, nextbinstart, ii, log_divisor, div_min);
+    }
+
+    // Return true if the list is sorted.  Otherwise, find the minimum and
+    // maximum.  Values are cast to Cast_type before comparison.
+    template <class RandomAccessIter, class Cast_type>
+    inline bool
+    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
+                  Cast_type & max, Cast_type & min)
+    {
+      min = max = cast_float_iter<Cast_type, RandomAccessIter>(current);
+      RandomAccessIter prev = current;
+      bool sorted = true;
+      while (++current < last) {
+        Cast_type value = cast_float_iter<Cast_type, RandomAccessIter>(current);
+        sorted &= *current >= *prev;
+        prev = current;
+        if (max < value)
+          max = value;
+        else if (value < min)
+          min = value;
+      }
+      return sorted;
+    }
+
+    //Special-case sorting of positive floats with casting
+    template <class RandomAccessIter, class Div_type, class Size_type>
+    inline void
+    positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+              , size_t *bin_sizes)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last, 
+                                                                max, min))
+        return;
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
+            current++) >> log_divisor) - div_min)]++;
+      bins[0] = first;
+      for (unsigned u = 0; u < bin_count - 1; u++)
+        bins[u + 1] = bins[u] + bin_sizes[u];
+
+
+      //Swap into place
+      RandomAccessIter nextbinstart = first;
+      for (unsigned u = 0; u < bin_count - 1; ++u)
+        float_swap_loop<RandomAccessIter, Div_type>
+          (bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
+      bins[bin_count - 1] = last;
+
+      //Return if we've completed bucketsorting
+      if (!log_divisor)
+        return;
+
+      //Recursing
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
+          ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u]);
+        else
+          positive_float_sort_rec<RandomAccessIter, Div_type, Size_type>
+            (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
+      }
+    }
+
+    //Sorting negative floats
+    //Bins are iterated in reverse because max_neg_float = min_neg_int
+    template <class RandomAccessIter, class Div_type, class Size_type>
+    inline void
+    negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+                        std::vector<RandomAccessIter> &bin_cache,
+                        unsigned cache_offset, size_t *bin_sizes)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last, 
+                                                                 max, min))
+        return;
+
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
+            current++) >> log_divisor) - div_min)]++;
+      bins[bin_count - 1] = first;
+      for (int ii = bin_count - 2; ii >= 0; --ii)
+        bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
+
+      //Swap into place
+      RandomAccessIter nextbinstart = first;
+      //The last bin will always have the correct elements in it
+      for (int ii = bin_count - 1; ii > 0; --ii)
+        float_swap_loop<RandomAccessIter, Div_type>
+          (bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
+      //Update the end position because we don't process the last bin
+      bin_cache[cache_offset] = last;
+
+      //Return if we've completed bucketsorting
+      if (!log_divisor)
+        return;
+
+      //Recursing
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
+          lastPos = bin_cache[ii], --ii) {
+        size_t count = bin_cache[ii] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[ii]);
+        else
+          negative_float_sort_rec<RandomAccessIter, Div_type, Size_type>
+            (lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
+      }
+    }
+
+    //Sorting negative floats
+    //Bins are iterated in reverse order because max_neg_float = min_neg_int
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Size_type>
+    inline void
+    negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+              , size_t *bin_sizes, Right_shift rshift)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min, rshift))
+        return;
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
+      bins[bin_count - 1] = first;
+      for (int ii = bin_count - 2; ii >= 0; --ii)
+        bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
+
+      //Swap into place
+      RandomAccessIter nextbinstart = first;
+      //The last bin will always have the correct elements in it
+      for (int ii = bin_count - 1; ii > 0; --ii)
+        swap_loop<RandomAccessIter, Div_type, Right_shift>
+          (bins, nextbinstart, ii, rshift, bin_sizes, log_divisor, div_min);
+      //Update the end position of the unprocessed last bin
+      bin_cache[cache_offset] = last;
+
+      //Return if we've completed bucketsorting
+      if (!log_divisor)
+        return;
+
+      //Recursing
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
+          lastPos = bin_cache[ii], --ii) {
+        size_t count = bin_cache[ii] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[ii]);
+        else
+          negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
+                                  Size_type>
+            (lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, rshift);
+      }
+    }
+
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare, class Size_type>
+    inline void
+    negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+            std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset,
+            size_t *bin_sizes, Right_shift rshift, Compare comp)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp))
+        return;
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
+      bins[bin_count - 1] = first;
+      for (int ii = bin_count - 2; ii >= 0; --ii)
+        bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
+
+      //Swap into place
+      RandomAccessIter nextbinstart = first;
+      //The last bin will always have the correct elements in it
+      for (int ii = bin_count - 1; ii > 0; --ii)
+        swap_loop<RandomAccessIter, Div_type, Right_shift>
+          (bins, nextbinstart, ii, rshift, bin_sizes, log_divisor, div_min);
+      //Update the end position of the unprocessed last bin
+      bin_cache[cache_offset] = last;
+
+      //Return if we've completed bucketsorting
+      if (!log_divisor)
+        return;
+
+      //Recursing
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
+          lastPos = bin_cache[ii], --ii) {
+        size_t count = bin_cache[ii] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[ii], comp);
+        else
+          negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
+                                  Compare, Size_type>(lastPos, bin_cache[ii],
+                                                      bin_cache, cache_end,
+                                                      bin_sizes, rshift, comp);
+      }
+    }
+
+    //Casting special-case for floating-point sorting
+    template <class RandomAccessIter, class Div_type, class Size_type>
+    inline void
+    float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+                std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+                , size_t *bin_sizes)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last, 
+                                                                max, min))
+        return;
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
+            current++) >> log_divisor) - div_min)]++;
+      //The index of the first positive bin
+      //Must be divided small enough to fit into an integer
+      unsigned first_positive = (div_min < 0) ? unsigned(-div_min) : 0;
+      //Resetting if all bins are negative
+      if (cache_offset + first_positive > cache_end)
+        first_positive = cache_end - cache_offset;
+      //Reversing the order of the negative bins
+      //Note that because of the negative/positive ordering direction flip
+      //We can not depend upon bin order and positions matching up
+      //so bin_sizes must be reused to contain the end of the bin
+      if (first_positive > 0) {
+        bins[first_positive - 1] = first;
+        for (int ii = first_positive - 2; ii >= 0; --ii) {
+          bins[ii] = first + bin_sizes[ii + 1];
+          bin_sizes[ii] += bin_sizes[ii + 1];
+        }
+        //Handling positives following negatives
+        if (first_positive < bin_count) {
+          bins[first_positive] = first + bin_sizes[0];
+          bin_sizes[first_positive] += bin_sizes[0];
+        }
+      }
+      else
+        bins[0] = first;
+      for (unsigned u = first_positive; u < bin_count - 1; u++) {
+        bins[u + 1] = first + bin_sizes[u];
+        bin_sizes[u + 1] += bin_sizes[u];
+      }
+
+      //Swap into place
+      RandomAccessIter nextbinstart = first;
+      for (unsigned u = 0; u < bin_count; ++u) {
+        nextbinstart = first + bin_sizes[u];
+        inner_float_swap_loop<RandomAccessIter, Div_type>
+          (bins, nextbinstart, u, log_divisor, div_min);
+      }
+
+      if (!log_divisor)
+        return;
+
+      //Handling negative values first
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (int ii = cache_offset + first_positive - 1; 
+           ii >= static_cast<int>(cache_offset);
+           lastPos = bin_cache[ii--]) {
+        size_t count = bin_cache[ii] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[ii]);
+        //sort negative values using reversed-bin spreadsort
+        else
+          negative_float_sort_rec<RandomAccessIter, Div_type, Size_type>
+            (lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
+      }
+
+      for (unsigned u = cache_offset + first_positive; u < cache_end;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u]);
+        //sort positive values using normal spreadsort
+        else
+          positive_float_sort_rec<RandomAccessIter, Div_type, Size_type>
+            (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
+      }
+    }
+
+    //Functor implementation for recursive sorting
+    template <class RandomAccessIter, class Div_type, class Right_shift
+      , class Size_type>
+    inline void
+    float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+              , size_t *bin_sizes, Right_shift rshift)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min, rshift))
+        return;
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
+      //The index of the first positive bin
+      unsigned first_positive = (div_min < 0) ? unsigned(-div_min) : 0;
+      //Resetting if all bins are negative
+      if (cache_offset + first_positive > cache_end)
+        first_positive = cache_end - cache_offset;
+      //Reversing the order of the negative bins
+      //Note that because of the negative/positive ordering direction flip
+      //We can not depend upon bin order and positions matching up
+      //so bin_sizes must be reused to contain the end of the bin
+      if (first_positive > 0) {
+        bins[first_positive - 1] = first;
+        for (int ii = first_positive - 2; ii >= 0; --ii) {
+          bins[ii] = first + bin_sizes[ii + 1];
+          bin_sizes[ii] += bin_sizes[ii + 1];
+        }
+        //Handling positives following negatives
+        if (static_cast<unsigned>(first_positive) < bin_count) {
+          bins[first_positive] = first + bin_sizes[0];
+          bin_sizes[first_positive] += bin_sizes[0];
+        }
+      }
+      else
+        bins[0] = first;
+      for (unsigned u = first_positive; u < bin_count - 1; u++) {
+        bins[u + 1] = first + bin_sizes[u];
+        bin_sizes[u + 1] += bin_sizes[u];
+      }
+
+      //Swap into place
+      RandomAccessIter next_bin_start = first;
+      for (unsigned u = 0; u < bin_count; ++u) {
+        next_bin_start = first + bin_sizes[u];
+        inner_swap_loop<RandomAccessIter, Div_type, Right_shift>
+          (bins, next_bin_start, u, rshift, log_divisor, div_min);
+      }
+
+      //Return if we've completed bucketsorting
+      if (!log_divisor)
+        return;
+
+      //Handling negative values first
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (int ii = cache_offset + first_positive - 1; 
+           ii >= static_cast<int>(cache_offset);
+           lastPos = bin_cache[ii--]) {
+        size_t count = bin_cache[ii] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[ii]);
+        //sort negative values using reversed-bin spreadsort
+        else
+          negative_float_sort_rec<RandomAccessIter, Div_type,
+            Right_shift, Size_type>(lastPos, bin_cache[ii], bin_cache,
+                                    cache_end, bin_sizes, rshift);
+      }
+
+      for (unsigned u = cache_offset + first_positive; u < cache_end;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u]);
+        //sort positive values using normal spreadsort
+        else
+          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
+                          float_log_mean_bin_size, float_log_min_split_count,
+                          float_log_finishing_count>
+            (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
+      }
+    }
+
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare, class Size_type>
+    inline void
+    float_sort_rec(RandomAccessIter first, RandomAccessIter last,
+            std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset,
+            size_t *bin_sizes, Right_shift rshift, Compare comp)
+    {
+      Div_type max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp))
+        return;
+      unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type(max - min)));
+      Div_type div_min = min >> log_divisor;
+      Div_type div_max = max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
+      //The index of the first positive bin
+      unsigned first_positive = 
+        (div_min < 0) ? static_cast<unsigned>(-div_min) : 0;
+      //Resetting if all bins are negative
+      if (cache_offset + first_positive > cache_end)
+        first_positive = cache_end - cache_offset;
+      //Reversing the order of the negative bins
+      //Note that because of the negative/positive ordering direction flip
+      //We can not depend upon bin order and positions matching up
+      //so bin_sizes must be reused to contain the end of the bin
+      if (first_positive > 0) {
+        bins[first_positive - 1] = first;
+        for (int ii = first_positive - 2; ii >= 0; --ii) {
+          bins[ii] = first + bin_sizes[ii + 1];
+          bin_sizes[ii] += bin_sizes[ii + 1];
+        }
+        //Handling positives following negatives
+        if (static_cast<unsigned>(first_positive) < bin_count) {
+          bins[first_positive] = first + bin_sizes[0];
+          bin_sizes[first_positive] += bin_sizes[0];
+        }
+      }
+      else
+        bins[0] = first;
+      for (unsigned u = first_positive; u < bin_count - 1; u++) {
+        bins[u + 1] = first + bin_sizes[u];
+        bin_sizes[u + 1] += bin_sizes[u];
+      }
+
+      //Swap into place
+      RandomAccessIter next_bin_start = first;
+      for (unsigned u = 0; u < bin_count; ++u) {
+        next_bin_start = first + bin_sizes[u];
+        inner_swap_loop<RandomAccessIter, Div_type, Right_shift>
+          (bins, next_bin_start, u, rshift, log_divisor, div_min);
+      }
+
+      //Return if we've completed bucketsorting
+      if (!log_divisor)
+        return;
+
+      //Handling negative values first
+      size_t max_count = get_min_count<float_log_mean_bin_size,
+                                       float_log_min_split_count,
+                                       float_log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (int ii = cache_offset + first_positive - 1; 
+           ii >= static_cast<int>(cache_offset);
+           lastPos = bin_cache[ii--]) {
+        size_t count = bin_cache[ii] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[ii], comp);
+        //sort negative values using reversed-bin spreadsort
+        else
+          negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
+                                  Compare, Size_type>(lastPos, bin_cache[ii],
+                                                      bin_cache, cache_end,
+                                                      bin_sizes, rshift, comp);
+      }
+
+      for (unsigned u = cache_offset + first_positive; u < cache_end;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u], comp);
+        //sort positive values using normal spreadsort
+        else
+          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
+                          Size_type, float_log_mean_bin_size,
+                          float_log_min_split_count, float_log_finishing_count>
+      (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
+      }
+    }
+
+    //Checking whether the value type is a float, and trying a 32-bit integer
+    template <class RandomAccessIter>
+    inline typename boost::enable_if_c< sizeof(boost::uint32_t) ==
+      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
+      && std::numeric_limits<typename
+      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
+      void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      float_sort_rec<RandomAccessIter, boost::int32_t, boost::uint32_t>
+        (first, last, bin_cache, 0, bin_sizes);
+    }
+
+    //Checking whether the value type is a double, and using a 64-bit integer
+    template <class RandomAccessIter>
+    inline typename boost::enable_if_c< sizeof(boost::uint64_t) ==
+      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
+      && std::numeric_limits<typename
+      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
+      void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      float_sort_rec<RandomAccessIter, boost::int64_t, boost::uint64_t>
+        (first, last, bin_cache, 0, bin_sizes);
+    }
+
+    template <class RandomAccessIter>
+    inline typename boost::disable_if_c< (sizeof(boost::uint64_t) ==
+      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
+      || sizeof(boost::uint32_t) ==
+      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type))
+      && std::numeric_limits<typename
+      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
+      void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last)
+    {
+      BOOST_STATIC_WARNING(!(sizeof(boost::uint64_t) ==
+      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
+      || sizeof(boost::uint32_t) ==
+      sizeof(typename std::iterator_traits<RandomAccessIter>::value_type))
+      || !std::numeric_limits<typename
+      std::iterator_traits<RandomAccessIter>::value_type>::is_iec559);
+      std::sort(first, last);
+    }
+
+    //These approaches require the user to do the typecast
+    //with rshift but default comparision
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline typename boost::enable_if_c< sizeof(size_t) >= sizeof(Div_type),
+      void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+               Right_shift rshift)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      float_sort_rec<RandomAccessIter, Div_type, Right_shift, size_t>
+        (first, last, bin_cache, 0, bin_sizes, rshift);
+    }
+
+    //maximum integer size with rshift but default comparision
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline typename boost::enable_if_c< sizeof(size_t) < sizeof(Div_type)
+      && sizeof(boost::uintmax_t) >= sizeof(Div_type), void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+               Right_shift rshift)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      float_sort_rec<RandomAccessIter, Div_type, Right_shift, boost::uintmax_t>
+        (first, last, bin_cache, 0, bin_sizes, rshift);
+    }
+
+    //sizeof(Div_type) doesn't match, so use std::sort
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline typename boost::disable_if_c< sizeof(boost::uintmax_t) >=
+      sizeof(Div_type), void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+               Right_shift rshift)
+    {
+      BOOST_STATIC_WARNING(sizeof(boost::uintmax_t) >= sizeof(Div_type));
+      std::sort(first, last);
+    }
+
+    //specialized comparison
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    inline typename boost::enable_if_c< sizeof(size_t) >= sizeof(Div_type),
+      void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+               Right_shift rshift, Compare comp)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      float_sort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
+        size_t>
+        (first, last, bin_cache, 0, bin_sizes, rshift, comp);
+    }
+
+    //max-sized integer with specialized comparison
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    inline typename boost::enable_if_c< sizeof(size_t) < sizeof(Div_type)
+      && sizeof(boost::uintmax_t) >= sizeof(Div_type), void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+               Right_shift rshift, Compare comp)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      float_sort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
+        boost::uintmax_t>
+        (first, last, bin_cache, 0, bin_sizes, rshift, comp);
+    }
+
+    //sizeof(Div_type) doesn't match, so use std::sort
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    inline typename boost::disable_if_c< sizeof(boost::uintmax_t) >=
+      sizeof(Div_type), void >::type
+    float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+               Right_shift rshift, Compare comp)
+    {
+      BOOST_STATIC_WARNING(sizeof(boost::uintmax_t) >= sizeof(Div_type));
+      std::sort(first, last, comp);
+    }
+  }
+}
+}
+}
+
+#endif
diff --git a/boost/sort/spreadsort/detail/integer_sort.hpp b/boost/sort/spreadsort/detail/integer_sort.hpp
index bc14b3585c..6d6886cfd9 100644
--- a/boost/sort/spreadsort/detail/integer_sort.hpp
+++ b/boost/sort/spreadsort/detail/integer_sort.hpp
@@ -1,494 +1,494 @@
-// Details for templated Spreadsort-based integer_sort.
-
-//          Copyright Steven J. Ross 2001 - 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-*/
-
-#ifndef BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
-#define BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
-#include <algorithm>
-#include <vector>
-#include <limits>
-#include <functional>
-#include <boost/static_assert.hpp>
-#include <boost/serialization/static_warning.hpp>
-#include <boost/utility/enable_if.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
-#include <boost/cstdint.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
-  namespace detail {
-    // Return true if the list is sorted.  Otherwise, find the minimum and
-    // maximum using <.
-    template <class RandomAccessIter>
-    inline bool
-    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
-                               RandomAccessIter & max, RandomAccessIter & min)
-    {
-      min = max = current;
-      //This assumes we have more than 1 element based on prior checks.
-      while (!(*(current + 1) < *current)) {
-        //If everything is in sorted order, return
-        if (++current == last - 1)
-          return true;
-      }
-
-      //The maximum is the last sorted element
-      max = current;
-      //Start from the first unsorted element
-      while (++current < last) {
-        if (*max < *current)
-          max = current;
-        else if (*current < *min)
-          min = current;
-      }
-      return false;
-    }
-
-    // Return true if the list is sorted.  Otherwise, find the minimum and
-    // maximum.
-    // Use a user-defined comparison operator
-    template <class RandomAccessIter, class Compare>
-    inline bool
-    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
-                RandomAccessIter & max, RandomAccessIter & min, Compare comp)
-    {
-      min = max = current;
-      while (!comp(*(current + 1), *current)) {
-        //If everything is in sorted order, return
-        if (++current == last - 1)
-          return true;
-      }
-
-      //The maximum is the last sorted element
-      max = current;
-      while (++current < last) {
-        if (comp(*max, *current))
-          max = current;
-        else if (comp(*current, *min))
-          min = current;
-      }
-      return false;
-    }
-
-    //Gets a non-negative right bit shift to operate as a logarithmic divisor
-    template<unsigned log_mean_bin_size>
-    inline int
-    get_log_divisor(size_t count, int log_range)
-    {
-      int log_divisor;
-      //If we can finish in one iteration without exceeding either
-      //(2 to the max_finishing_splits) or n bins, do so
-      if ((log_divisor = log_range - rough_log_2_size(count)) <= 0 && 
-         log_range <= max_finishing_splits)
-        log_divisor = 0; 
-      else {
-        //otherwise divide the data into an optimized number of pieces
-        log_divisor += log_mean_bin_size;
-        //Cannot exceed max_splits or cache misses slow down bin lookups
-        if ((log_range - log_divisor) > max_splits)
-          log_divisor = log_range - max_splits;
-      }
-      return log_divisor;
-    }
-
-    //Implementation for recursive integer sorting
-    template <class RandomAccessIter, class Div_type, class Size_type>
-    inline void
-    spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
-              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-              , size_t *bin_sizes)
-    {
-      //This step is roughly 10% of runtime, but it helps avoid worst-case
-      //behavior and improve behavior with real data
-      //If you know the maximum and minimum ahead of time, you can pass those
-      //values in and skip this step for the first iteration
-      RandomAccessIter max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min))
-        return;
-      RandomAccessIter * target_bin;
-      unsigned log_divisor = get_log_divisor<int_log_mean_bin_size>(
-          last - first, rough_log_2_size(Size_type((*max >> 0) - (*min >> 0))));
-      Div_type div_min = *min >> log_divisor;
-      Div_type div_max = *max >> log_divisor;
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins =
-        size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
-
-      //Calculating the size of each bin; this takes roughly 10% of runtime
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[size_t((*(current++) >> log_divisor) - div_min)]++;
-      //Assign the bin positions
-      bins[0] = first;
-      for (unsigned u = 0; u < bin_count - 1; u++)
-        bins[u + 1] = bins[u] + bin_sizes[u];
-
-      RandomAccessIter nextbinstart = first;
-      //Swap into place
-      //This dominates runtime, mostly in the swap and bin lookups
-      for (unsigned u = 0; u < bin_count - 1; ++u) {
-        RandomAccessIter * local_bin = bins + u;
-        nextbinstart += bin_sizes[u];
-        //Iterating over each element in this bin
-        for (RandomAccessIter current = *local_bin; current < nextbinstart;
-            ++current) {
-          //Swapping elements in current into place until the correct
-          //element has been swapped in
-          for (target_bin = (bins + ((*current >> log_divisor) - div_min));
-              target_bin != local_bin;
-            target_bin = bins + ((*current >> log_divisor) - div_min)) {
-            //3-way swap; this is about 1% faster than a 2-way swap
-            //The main advantage is less copies are involved per item
-            //put in the correct place
-            typename std::iterator_traits<RandomAccessIter>::value_type tmp;
-            RandomAccessIter b = (*target_bin)++;
-            RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
-            if (b_bin != local_bin) {
-              RandomAccessIter c = (*b_bin)++;
-              tmp = *c;
-              *c = *b;
-            }
-            else
-              tmp = *b;
-            *b = *current;
-            *current = tmp;
-          }
-        }
-        *local_bin = nextbinstart;
-      }
-      bins[bin_count - 1] = last;
-
-      //If we've bucketsorted, the array is sorted and we should skip recursion
-      if (!log_divisor)
-        return;
-      //log_divisor is the remaining range; calculating the comparison threshold
-      size_t max_count =
-        get_min_count<int_log_mean_bin_size, int_log_min_split_count,
-                      int_log_finishing_count>(log_divisor);
-
-      //Recursing
-      RandomAccessIter lastPos = first;
-      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
-          ++u) {
-        Size_type count = bin_cache[u] - lastPos;
-        //don't sort unless there are at least two items to Compare
-        if (count < 2)
-          continue;
-        //using std::sort if its worst-case is better
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u]);
-        else
-          spreadsort_rec<RandomAccessIter, Div_type, Size_type>(lastPos,
-                                                                 bin_cache[u],
-                                                                 bin_cache,
-                                                                 cache_end,
-                                                                 bin_sizes);
-      }
-    }
-
-    //Generic bitshift-based 3-way swapping code
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline void inner_swap_loop(RandomAccessIter * bins,
-      const RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
-      , const unsigned log_divisor, const Div_type div_min)
-    {
-      RandomAccessIter * local_bin = bins + ii;
-      for (RandomAccessIter current = *local_bin; current < next_bin_start;
-          ++current) {
-        for (RandomAccessIter * target_bin =
-            (bins + (rshift(*current, log_divisor) - div_min));
-            target_bin != local_bin;
-            target_bin = bins + (rshift(*current, log_divisor) - div_min)) {
-          typename std::iterator_traits<RandomAccessIter>::value_type tmp;
-          RandomAccessIter b = (*target_bin)++;
-          RandomAccessIter * b_bin =
-            bins + (rshift(*b, log_divisor) - div_min);
-          //Three-way swap; if the item to be swapped doesn't belong
-          //in the current bin, swap it to where it belongs
-          if (b_bin != local_bin) {
-            RandomAccessIter c = (*b_bin)++;
-            tmp = *c;
-            *c = *b;
-          }
-          //Note: we could increment current once the swap is done in this case
-          //but that seems to impair performance
-          else
-            tmp = *b;
-          *b = *current;
-          *current = tmp;
-        }
-      }
-      *local_bin = next_bin_start;
-    }
-
-    //Standard swapping wrapper for ascending values
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline void swap_loop(RandomAccessIter * bins,
-             RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
-             , const size_t *bin_sizes
-             , const unsigned log_divisor, const Div_type div_min)
-    {
-      next_bin_start += bin_sizes[ii];
-      inner_swap_loop<RandomAccessIter, Div_type, Right_shift>(bins,
-                              next_bin_start, ii, rshift, log_divisor, div_min);
-    }
-
-    //Functor implementation for recursive sorting
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare, class Size_type, unsigned log_mean_bin_size,
-                unsigned log_min_split_count, unsigned log_finishing_count>
-    inline void
-    spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
-          std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-          , size_t *bin_sizes, Right_shift rshift, Compare comp)
-    {
-      RandomAccessIter max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min, comp))
-        return;
-      unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
-            rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
-      Div_type div_min = rshift(*min, log_divisor);
-      Div_type div_max = rshift(*max, log_divisor);
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
-      bins[0] = first;
-      for (unsigned u = 0; u < bin_count - 1; u++)
-        bins[u + 1] = bins[u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter next_bin_start = first;
-      for (unsigned u = 0; u < bin_count - 1; ++u)
-        swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, next_bin_start,
-                                  u, rshift, bin_sizes, log_divisor, div_min);
-      bins[bin_count - 1] = last;
-
-      //If we've bucketsorted, the array is sorted
-      if (!log_divisor)
-        return;
-
-      //Recursing
-      size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
-                          log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
-          ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u], comp);
-        else
-          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
-        Size_type, log_mean_bin_size, log_min_split_count, log_finishing_count>
-      (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
-      }
-    }
-
-    //Functor implementation for recursive sorting with only Shift overridden
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Size_type, unsigned log_mean_bin_size,
-              unsigned log_min_split_count, unsigned log_finishing_count>
-    inline void
-    spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
-              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
-              , size_t *bin_sizes, Right_shift rshift)
-    {
-      RandomAccessIter max, min;
-      if (is_sorted_or_find_extremes(first, last, max, min))
-        return;
-      unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
-            rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
-      Div_type div_min = rshift(*min, log_divisor);
-      Div_type div_max = rshift(*max, log_divisor);
-      unsigned bin_count = unsigned(div_max - div_min) + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, bin_count);
-
-      //Calculating the size of each bin
-      for (RandomAccessIter current = first; current != last;)
-        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
-      bins[0] = first;
-      for (unsigned u = 0; u < bin_count - 1; u++)
-        bins[u + 1] = bins[u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter nextbinstart = first;
-      for (unsigned ii = 0; ii < bin_count - 1; ++ii)
-        swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, nextbinstart,
-                                ii, rshift, bin_sizes, log_divisor, div_min);
-      bins[bin_count - 1] = last;
-
-      //If we've bucketsorted, the array is sorted
-      if (!log_divisor)
-        return;
-
-      //Recursing
-      size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
-                          log_finishing_count>(log_divisor);
-      RandomAccessIter lastPos = first;
-      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
-          ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        if (count < 2)
-          continue;
-        if (count < max_count)
-          std::sort(lastPos, bin_cache[u]);
-        else
-          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
-          log_mean_bin_size, log_min_split_count, log_finishing_count>(lastPos,
-                      bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
-      }
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Div_type>
-    //Only use spreadsort if the integer can fit in a size_t
-    inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
-                                                            void >::type
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      spreadsort_rec<RandomAccessIter, Div_type, size_t>(first, last,
-          bin_cache, 0, bin_sizes);
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Div_type>
-    //Only use spreadsort if the integer can fit in a uintmax_t
-    inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
-      && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      spreadsort_rec<RandomAccessIter, Div_type, boost::uintmax_t>(first,
-          last, bin_cache, 0, bin_sizes);
-    }
-
-    template <class RandomAccessIter, class Div_type>
-    inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
-      || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
-    //defaulting to std::sort when integer_sort won't work
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
-    {
-      //Warning that we're using std::sort, even though integer_sort was called
-      BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
-      std::sort(first, last);
-    }
-
-
-    //Same for the full functor version
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    //Only use spreadsort if the integer can fit in a size_t
-    inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
-                                 void >::type
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-                Right_shift shift, Compare comp)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
-          size_t, int_log_mean_bin_size, int_log_min_split_count, 
-                        int_log_finishing_count>
-          (first, last, bin_cache, 0, bin_sizes, shift, comp);
-    }
-
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    //Only use spreadsort if the integer can fit in a uintmax_t
-    inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
-      && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-                Right_shift shift, Compare comp)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
-                        boost::uintmax_t, int_log_mean_bin_size,
-                        int_log_min_split_count, int_log_finishing_count>
-          (first, last, bin_cache, 0, bin_sizes, shift, comp);
-    }
-
-    template <class RandomAccessIter, class Div_type, class Right_shift,
-              class Compare>
-    inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
-      || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
-    //defaulting to std::sort when integer_sort won't work
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-                Right_shift shift, Compare comp)
-    {
-      //Warning that we're using std::sort, even though integer_sort was called
-      BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
-      std::sort(first, last, comp);
-    }
-
-
-    //Same for the right shift version
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    //Only use spreadsort if the integer can fit in a size_t
-    inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
-                                 void >::type
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-                Right_shift shift)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      spreadsort_rec<RandomAccessIter, Div_type, Right_shift, size_t,
-          int_log_mean_bin_size, int_log_min_split_count, 
-                        int_log_finishing_count>
-          (first, last, bin_cache, 0, bin_sizes, shift);
-    }
-
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    //Only use spreadsort if the integer can fit in a uintmax_t
-    inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
-      && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-                Right_shift shift)
-    {
-      size_t bin_sizes[1 << max_finishing_splits];
-      std::vector<RandomAccessIter> bin_cache;
-      spreadsort_rec<RandomAccessIter, Div_type, Right_shift,
-                        boost::uintmax_t, int_log_mean_bin_size,
-                        int_log_min_split_count, int_log_finishing_count>
-          (first, last, bin_cache, 0, bin_sizes, shift);
-    }
-
-    template <class RandomAccessIter, class Div_type, class Right_shift>
-    inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
-      || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
-    //defaulting to std::sort when integer_sort won't work
-    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
-                Right_shift shift)
-    {
-      //Warning that we're using std::sort, even though integer_sort was called
-      BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
-      std::sort(first, last);
-    }
-  }
-}
-}
-}
-
-#endif
+// Details for templated Spreadsort-based integer_sort.
+
+//          Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+*/
+
+#ifndef BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
+#define BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <limits>
+#include <functional>
+#include <boost/static_assert.hpp>
+#include <boost/serialization/static_warning.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
+#include <boost/cstdint.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+  namespace detail {
+    // Return true if the list is sorted.  Otherwise, find the minimum and
+    // maximum using <.
+    template <class RandomAccessIter>
+    inline bool
+    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
+                               RandomAccessIter & max, RandomAccessIter & min)
+    {
+      min = max = current;
+      //This assumes we have more than 1 element based on prior checks.
+      while (!(*(current + 1) < *current)) {
+        //If everything is in sorted order, return
+        if (++current == last - 1)
+          return true;
+      }
+
+      //The maximum is the last sorted element
+      max = current;
+      //Start from the first unsorted element
+      while (++current < last) {
+        if (*max < *current)
+          max = current;
+        else if (*current < *min)
+          min = current;
+      }
+      return false;
+    }
+
+    // Return true if the list is sorted.  Otherwise, find the minimum and
+    // maximum.
+    // Use a user-defined comparison operator
+    template <class RandomAccessIter, class Compare>
+    inline bool
+    is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
+                RandomAccessIter & max, RandomAccessIter & min, Compare comp)
+    {
+      min = max = current;
+      while (!comp(*(current + 1), *current)) {
+        //If everything is in sorted order, return
+        if (++current == last - 1)
+          return true;
+      }
+
+      //The maximum is the last sorted element
+      max = current;
+      while (++current < last) {
+        if (comp(*max, *current))
+          max = current;
+        else if (comp(*current, *min))
+          min = current;
+      }
+      return false;
+    }
+
+    //Gets a non-negative right bit shift to operate as a logarithmic divisor
+    template<unsigned log_mean_bin_size>
+    inline int
+    get_log_divisor(size_t count, int log_range)
+    {
+      int log_divisor;
+      //If we can finish in one iteration without exceeding either
+      //(2 to the max_finishing_splits) or n bins, do so
+      if ((log_divisor = log_range - rough_log_2_size(count)) <= 0 && 
+         log_range <= max_finishing_splits)
+        log_divisor = 0; 
+      else {
+        //otherwise divide the data into an optimized number of pieces
+        log_divisor += log_mean_bin_size;
+        //Cannot exceed max_splits or cache misses slow down bin lookups
+        if ((log_range - log_divisor) > max_splits)
+          log_divisor = log_range - max_splits;
+      }
+      return log_divisor;
+    }
+
+    //Implementation for recursive integer sorting
+    template <class RandomAccessIter, class Div_type, class Size_type>
+    inline void
+    spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
+              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+              , size_t *bin_sizes)
+    {
+      //This step is roughly 10% of runtime, but it helps avoid worst-case
+      //behavior and improve behavior with real data
+      //If you know the maximum and minimum ahead of time, you can pass those
+      //values in and skip this step for the first iteration
+      RandomAccessIter max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min))
+        return;
+      RandomAccessIter * target_bin;
+      unsigned log_divisor = get_log_divisor<int_log_mean_bin_size>(
+          last - first, rough_log_2_size(Size_type((*max >> 0) - (*min >> 0))));
+      Div_type div_min = *min >> log_divisor;
+      Div_type div_max = *max >> log_divisor;
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins =
+        size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
+
+      //Calculating the size of each bin; this takes roughly 10% of runtime
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[size_t((*(current++) >> log_divisor) - div_min)]++;
+      //Assign the bin positions
+      bins[0] = first;
+      for (unsigned u = 0; u < bin_count - 1; u++)
+        bins[u + 1] = bins[u] + bin_sizes[u];
+
+      RandomAccessIter nextbinstart = first;
+      //Swap into place
+      //This dominates runtime, mostly in the swap and bin lookups
+      for (unsigned u = 0; u < bin_count - 1; ++u) {
+        RandomAccessIter * local_bin = bins + u;
+        nextbinstart += bin_sizes[u];
+        //Iterating over each element in this bin
+        for (RandomAccessIter current = *local_bin; current < nextbinstart;
+            ++current) {
+          //Swapping elements in current into place until the correct
+          //element has been swapped in
+          for (target_bin = (bins + ((*current >> log_divisor) - div_min));
+              target_bin != local_bin;
+            target_bin = bins + ((*current >> log_divisor) - div_min)) {
+            //3-way swap; this is about 1% faster than a 2-way swap
+            //The main advantage is less copies are involved per item
+            //put in the correct place
+            typename std::iterator_traits<RandomAccessIter>::value_type tmp;
+            RandomAccessIter b = (*target_bin)++;
+            RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
+            if (b_bin != local_bin) {
+              RandomAccessIter c = (*b_bin)++;
+              tmp = *c;
+              *c = *b;
+            }
+            else
+              tmp = *b;
+            *b = *current;
+            *current = tmp;
+          }
+        }
+        *local_bin = nextbinstart;
+      }
+      bins[bin_count - 1] = last;
+
+      //If we've bucketsorted, the array is sorted and we should skip recursion
+      if (!log_divisor)
+        return;
+      //log_divisor is the remaining range; calculating the comparison threshold
+      size_t max_count =
+        get_min_count<int_log_mean_bin_size, int_log_min_split_count,
+                      int_log_finishing_count>(log_divisor);
+
+      //Recursing
+      RandomAccessIter lastPos = first;
+      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
+          ++u) {
+        Size_type count = bin_cache[u] - lastPos;
+        //don't sort unless there are at least two items to Compare
+        if (count < 2)
+          continue;
+        //using std::sort if its worst-case is better
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u]);
+        else
+          spreadsort_rec<RandomAccessIter, Div_type, Size_type>(lastPos,
+                                                                 bin_cache[u],
+                                                                 bin_cache,
+                                                                 cache_end,
+                                                                 bin_sizes);
+      }
+    }
+
+    //Generic bitshift-based 3-way swapping code
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline void inner_swap_loop(RandomAccessIter * bins,
+      const RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
+      , const unsigned log_divisor, const Div_type div_min)
+    {
+      RandomAccessIter * local_bin = bins + ii;
+      for (RandomAccessIter current = *local_bin; current < next_bin_start;
+          ++current) {
+        for (RandomAccessIter * target_bin =
+            (bins + (rshift(*current, log_divisor) - div_min));
+            target_bin != local_bin;
+            target_bin = bins + (rshift(*current, log_divisor) - div_min)) {
+          typename std::iterator_traits<RandomAccessIter>::value_type tmp;
+          RandomAccessIter b = (*target_bin)++;
+          RandomAccessIter * b_bin =
+            bins + (rshift(*b, log_divisor) - div_min);
+          //Three-way swap; if the item to be swapped doesn't belong
+          //in the current bin, swap it to where it belongs
+          if (b_bin != local_bin) {
+            RandomAccessIter c = (*b_bin)++;
+            tmp = *c;
+            *c = *b;
+          }
+          //Note: we could increment current once the swap is done in this case
+          //but that seems to impair performance
+          else
+            tmp = *b;
+          *b = *current;
+          *current = tmp;
+        }
+      }
+      *local_bin = next_bin_start;
+    }
+
+    //Standard swapping wrapper for ascending values
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline void swap_loop(RandomAccessIter * bins,
+             RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
+             , const size_t *bin_sizes
+             , const unsigned log_divisor, const Div_type div_min)
+    {
+      next_bin_start += bin_sizes[ii];
+      inner_swap_loop<RandomAccessIter, Div_type, Right_shift>(bins,
+                              next_bin_start, ii, rshift, log_divisor, div_min);
+    }
+
+    //Functor implementation for recursive sorting
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare, class Size_type, unsigned log_mean_bin_size,
+                unsigned log_min_split_count, unsigned log_finishing_count>
+    inline void
+    spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
+          std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+          , size_t *bin_sizes, Right_shift rshift, Compare comp)
+    {
+      RandomAccessIter max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min, comp))
+        return;
+      unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
+            rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
+      Div_type div_min = rshift(*min, log_divisor);
+      Div_type div_max = rshift(*max, log_divisor);
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
+      bins[0] = first;
+      for (unsigned u = 0; u < bin_count - 1; u++)
+        bins[u + 1] = bins[u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter next_bin_start = first;
+      for (unsigned u = 0; u < bin_count - 1; ++u)
+        swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, next_bin_start,
+                                  u, rshift, bin_sizes, log_divisor, div_min);
+      bins[bin_count - 1] = last;
+
+      //If we've bucketsorted, the array is sorted
+      if (!log_divisor)
+        return;
+
+      //Recursing
+      size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
+                          log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
+          ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u], comp);
+        else
+          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
+        Size_type, log_mean_bin_size, log_min_split_count, log_finishing_count>
+      (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
+      }
+    }
+
+    //Functor implementation for recursive sorting with only Shift overridden
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Size_type, unsigned log_mean_bin_size,
+              unsigned log_min_split_count, unsigned log_finishing_count>
+    inline void
+    spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
+              std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
+              , size_t *bin_sizes, Right_shift rshift)
+    {
+      RandomAccessIter max, min;
+      if (is_sorted_or_find_extremes(first, last, max, min))
+        return;
+      unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
+            rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
+      Div_type div_min = rshift(*min, log_divisor);
+      Div_type div_max = rshift(*max, log_divisor);
+      unsigned bin_count = unsigned(div_max - div_min) + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, bin_count);
+
+      //Calculating the size of each bin
+      for (RandomAccessIter current = first; current != last;)
+        bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
+      bins[0] = first;
+      for (unsigned u = 0; u < bin_count - 1; u++)
+        bins[u + 1] = bins[u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter nextbinstart = first;
+      for (unsigned ii = 0; ii < bin_count - 1; ++ii)
+        swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, nextbinstart,
+                                ii, rshift, bin_sizes, log_divisor, div_min);
+      bins[bin_count - 1] = last;
+
+      //If we've bucketsorted, the array is sorted
+      if (!log_divisor)
+        return;
+
+      //Recursing
+      size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
+                          log_finishing_count>(log_divisor);
+      RandomAccessIter lastPos = first;
+      for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
+          ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        if (count < 2)
+          continue;
+        if (count < max_count)
+          std::sort(lastPos, bin_cache[u]);
+        else
+          spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
+          log_mean_bin_size, log_min_split_count, log_finishing_count>(lastPos,
+                      bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
+      }
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Div_type>
+    //Only use spreadsort if the integer can fit in a size_t
+    inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
+                                                            void >::type
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      spreadsort_rec<RandomAccessIter, Div_type, size_t>(first, last,
+          bin_cache, 0, bin_sizes);
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Div_type>
+    //Only use spreadsort if the integer can fit in a uintmax_t
+    inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
+      && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      spreadsort_rec<RandomAccessIter, Div_type, boost::uintmax_t>(first,
+          last, bin_cache, 0, bin_sizes);
+    }
+
+    template <class RandomAccessIter, class Div_type>
+    inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
+      || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
+    //defaulting to std::sort when integer_sort won't work
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
+    {
+      //Warning that we're using std::sort, even though integer_sort was called
+      BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
+      std::sort(first, last);
+    }
+
+
+    //Same for the full functor version
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    //Only use spreadsort if the integer can fit in a size_t
+    inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
+                                 void >::type
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+                Right_shift shift, Compare comp)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
+          size_t, int_log_mean_bin_size, int_log_min_split_count, 
+                        int_log_finishing_count>
+          (first, last, bin_cache, 0, bin_sizes, shift, comp);
+    }
+
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    //Only use spreadsort if the integer can fit in a uintmax_t
+    inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
+      && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+                Right_shift shift, Compare comp)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
+                        boost::uintmax_t, int_log_mean_bin_size,
+                        int_log_min_split_count, int_log_finishing_count>
+          (first, last, bin_cache, 0, bin_sizes, shift, comp);
+    }
+
+    template <class RandomAccessIter, class Div_type, class Right_shift,
+              class Compare>
+    inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
+      || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
+    //defaulting to std::sort when integer_sort won't work
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+                Right_shift shift, Compare comp)
+    {
+      //Warning that we're using std::sort, even though integer_sort was called
+      BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
+      std::sort(first, last, comp);
+    }
+
+
+    //Same for the right shift version
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    //Only use spreadsort if the integer can fit in a size_t
+    inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
+                                 void >::type
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+                Right_shift shift)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      spreadsort_rec<RandomAccessIter, Div_type, Right_shift, size_t,
+          int_log_mean_bin_size, int_log_min_split_count, 
+                        int_log_finishing_count>
+          (first, last, bin_cache, 0, bin_sizes, shift);
+    }
+
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    //Only use spreadsort if the integer can fit in a uintmax_t
+    inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
+      && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+                Right_shift shift)
+    {
+      size_t bin_sizes[1 << max_finishing_splits];
+      std::vector<RandomAccessIter> bin_cache;
+      spreadsort_rec<RandomAccessIter, Div_type, Right_shift,
+                        boost::uintmax_t, int_log_mean_bin_size,
+                        int_log_min_split_count, int_log_finishing_count>
+          (first, last, bin_cache, 0, bin_sizes, shift);
+    }
+
+    template <class RandomAccessIter, class Div_type, class Right_shift>
+    inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
+      || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
+    //defaulting to std::sort when integer_sort won't work
+    integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
+                Right_shift shift)
+    {
+      //Warning that we're using std::sort, even though integer_sort was called
+      BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
+      std::sort(first, last);
+    }
+  }
+}
+}
+}
+
+#endif
diff --git a/boost/sort/spreadsort/detail/spreadsort_common.hpp b/boost/sort/spreadsort/detail/spreadsort_common.hpp
index 7b299ad5f3..7917fddae0 100644
--- a/boost/sort/spreadsort/detail/spreadsort_common.hpp
+++ b/boost/sort/spreadsort/detail/spreadsort_common.hpp
@@ -1,124 +1,124 @@
-// Contains get_min_count, the core optimization of the spreadsort algorithm.
-// Also has other helper functions commonly useful across variants.
-
-//          Copyright Steven J. Ross 2001 - 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-*/
-
-#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
-#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
-#include <algorithm>
-#include <vector>
-#include <cstring>
-#include <limits>
-#include <functional>
-#include <boost/static_assert.hpp>
-#include <boost/serialization/static_warning.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/cstdint.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
- namespace detail {
-    //This only works on unsigned data types
-    template <typename T>
-    inline unsigned
-    rough_log_2_size(const T& input)
-    {
-      unsigned result = 0;
-      //The && is necessary on some compilers to avoid infinite loops
-      //it doesn't significantly impair performance
-      while ((input >> result) && (result < (8*sizeof(T)))) ++result;
-      return result;
-    }
-
-    //Gets the minimum size to call spreadsort on to control worst-case runtime.
-    //This is called for a set of bins, instead of bin-by-bin, to minimize
-    //runtime overhead.
-    //This could be replaced by a lookup table of sizeof(Div_type)*8 but this
-    //function is more general.
-    template<unsigned log_mean_bin_size,
-      unsigned log_min_split_count, unsigned log_finishing_count>
-    inline size_t
-    get_min_count(unsigned log_range)
-    {
-      const size_t typed_one = 1;
-      const unsigned min_size = log_mean_bin_size + log_min_split_count;
-      //Assuring that constants have valid settings
-      BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
-                          log_min_split_count > 0);
-      BOOST_STATIC_ASSERT(max_splits > 1 &&
-                          max_splits < (8 * sizeof(unsigned)));
-      BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
-                          max_finishing_splits < (8 * sizeof(unsigned)));
-      BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
-      BOOST_STATIC_ASSERT(log_finishing_count >= 0);
-      //if we can complete in one iteration, do so
-      //This first check allows the compiler to optimize never-executed code out
-      if (log_finishing_count < min_size) {
-        if (log_range <= min_size && log_range <= max_splits) {
-          //Return no smaller than a certain minimum limit
-          if (log_range <= log_finishing_count)
-            return typed_one << log_finishing_count;
-          return typed_one << log_range;
-        }
-      }
-      const unsigned base_iterations = max_splits - log_min_split_count;
-      //sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
-      const unsigned base_range =
-          ((base_iterations + 1) * (max_splits + log_min_split_count))/2
-          + log_mean_bin_size;
-      //Calculating the required number of iterations, and returning
-      //1 << (iteration_count + min_size)
-      if (log_range < base_range) {
-        unsigned result = log_min_split_count;
-        for (unsigned offset = min_size; offset < log_range;
-          offset += ++result);
-        //Preventing overflow; this situation shouldn't occur
-        if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
-          return typed_one << ((8 * sizeof(size_t)) - 1);
-        return typed_one << (result + log_mean_bin_size);
-      }
-      //A quick division can calculate the worst-case runtime for larger ranges
-      unsigned remainder = log_range - base_range;
-      //the max_splits - 1 is used to calculate the ceiling of the division
-      unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
-        + base_iterations + min_size);
-      //Preventing overflow; this situation shouldn't occur
-      if (bit_length >= (8 * sizeof(size_t)))
-        return typed_one << ((8 * sizeof(size_t)) - 1);
-      //n(log_range)/max_splits + C, optimizing worst-case performance
-      return typed_one << bit_length;
-    }
-
-    // Resizes the bin cache and bin sizes, and initializes each bin size to 0.
-    // This generates the memory overhead to use in radix sorting.
-    template <class RandomAccessIter>
-    inline RandomAccessIter *
-    size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
-  &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
-    {
-      // Clear the bin sizes
-      for (size_t u = 0; u < bin_count; u++)
-        bin_sizes[u] = 0;
-      //Make sure there is space for the bins
-      cache_end = cache_offset + bin_count;
-      if (cache_end > bin_cache.size())
-        bin_cache.resize(cache_end);
-      return &(bin_cache[cache_offset]);
-    }
-  }
-}
-}
-}
-
-#endif
+// Contains get_min_count, the core optimization of the spreadsort algorithm.
+// Also has other helper functions commonly useful across variants.
+
+//          Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+*/
+
+#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
+#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <limits>
+#include <functional>
+#include <boost/static_assert.hpp>
+#include <boost/serialization/static_warning.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/cstdint.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+ namespace detail {
+    //This only works on unsigned data types
+    template <typename T>
+    inline unsigned
+    rough_log_2_size(const T& input)
+    {
+      unsigned result = 0;
+      //The && is necessary on some compilers to avoid infinite loops
+      //it doesn't significantly impair performance
+      while ((input >> result) && (result < (8*sizeof(T)))) ++result;
+      return result;
+    }
+
+    //Gets the minimum size to call spreadsort on to control worst-case runtime.
+    //This is called for a set of bins, instead of bin-by-bin, to minimize
+    //runtime overhead.
+    //This could be replaced by a lookup table of sizeof(Div_type)*8 but this
+    //function is more general.
+    template<unsigned log_mean_bin_size,
+      unsigned log_min_split_count, unsigned log_finishing_count>
+    inline size_t
+    get_min_count(unsigned log_range)
+    {
+      const size_t typed_one = 1;
+      const unsigned min_size = log_mean_bin_size + log_min_split_count;
+      //Assuring that constants have valid settings
+      BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
+                          log_min_split_count > 0);
+      BOOST_STATIC_ASSERT(max_splits > 1 &&
+                          max_splits < (8 * sizeof(unsigned)));
+      BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
+                          max_finishing_splits < (8 * sizeof(unsigned)));
+      BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
+      BOOST_STATIC_ASSERT(log_finishing_count >= 0);
+      //if we can complete in one iteration, do so
+      //This first check allows the compiler to optimize never-executed code out
+      if (log_finishing_count < min_size) {
+        if (log_range <= min_size && log_range <= max_splits) {
+          //Return no smaller than a certain minimum limit
+          if (log_range <= log_finishing_count)
+            return typed_one << log_finishing_count;
+          return typed_one << log_range;
+        }
+      }
+      const unsigned base_iterations = max_splits - log_min_split_count;
+      //sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
+      const unsigned base_range =
+          ((base_iterations + 1) * (max_splits + log_min_split_count))/2
+          + log_mean_bin_size;
+      //Calculating the required number of iterations, and returning
+      //1 << (iteration_count + min_size)
+      if (log_range < base_range) {
+        unsigned result = log_min_split_count;
+        for (unsigned offset = min_size; offset < log_range;
+          offset += ++result);
+        //Preventing overflow; this situation shouldn't occur
+        if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
+          return typed_one << ((8 * sizeof(size_t)) - 1);
+        return typed_one << (result + log_mean_bin_size);
+      }
+      //A quick division can calculate the worst-case runtime for larger ranges
+      unsigned remainder = log_range - base_range;
+      //the max_splits - 1 is used to calculate the ceiling of the division
+      unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
+        + base_iterations + min_size);
+      //Preventing overflow; this situation shouldn't occur
+      if (bit_length >= (8 * sizeof(size_t)))
+        return typed_one << ((8 * sizeof(size_t)) - 1);
+      //n(log_range)/max_splits + C, optimizing worst-case performance
+      return typed_one << bit_length;
+    }
+
+    // Resizes the bin cache and bin sizes, and initializes each bin size to 0.
+    // This generates the memory overhead to use in radix sorting.
+    template <class RandomAccessIter>
+    inline RandomAccessIter *
+    size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
+  &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
+    {
+      // Clear the bin sizes
+      for (size_t u = 0; u < bin_count; u++)
+        bin_sizes[u] = 0;
+      //Make sure there is space for the bins
+      cache_end = cache_offset + bin_count;
+      if (cache_end > bin_cache.size())
+        bin_cache.resize(cache_end);
+      return &(bin_cache[cache_offset]);
+    }
+  }
+}
+}
+}
+
+#endif
diff --git a/boost/sort/spreadsort/detail/string_sort.hpp b/boost/sort/spreadsort/detail/string_sort.hpp
index 582508fb7b..a548ebefa5 100644
--- a/boost/sort/spreadsort/detail/string_sort.hpp
+++ b/boost/sort/spreadsort/detail/string_sort.hpp
@@ -1,819 +1,819 @@
-// Details for a templated general-case hybrid-radix string_sort.
-
-//          Copyright Steven J. Ross 2001 - 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-*/
-
-#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
-#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
-#include <algorithm>
-#include <vector>
-#include <cstring>
-#include <limits>
-#include <functional>
-#include <boost/static_assert.hpp>
-#include <boost/serialization/static_warning.hpp>
-#include <boost/utility/enable_if.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
-#include <boost/cstdint.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
-  namespace detail {
-    static const int max_step_size = 64;
-
-    //Offsetting on identical characters.  This function works a chunk of
-    //characters at a time for cache efficiency and optimal worst-case
-    //performance.
-    template<class RandomAccessIter, class Unsigned_char_type>
-    inline void
-    update_offset(RandomAccessIter first, RandomAccessIter finish,
-                  size_t &char_offset)
-    {
-      const int char_size = sizeof(Unsigned_char_type);
-      size_t nextOffset = char_offset;
-      int step_size = max_step_size / char_size;
-      while (true) {
-        RandomAccessIter curr = first;
-        do {
-          //Ignore empties, but if the nextOffset would exceed the length or
-          //not match, exit; we've found the last matching character
-          //This will reduce the step_size if the current step doesn't match.
-          if ((*curr).size() > char_offset) {
-            if((*curr).size() <= (nextOffset + step_size)) {
-              step_size = (*curr).size() - nextOffset - 1;
-              if (step_size < 1) {
-                char_offset = nextOffset;
-                return;
-              }
-            }
-            const int step_byte_size = step_size * char_size;
-            if (memcmp(curr->data() + nextOffset, first->data() + nextOffset, 
-                       step_byte_size) != 0) {
-              if (step_size == 1) {
-                char_offset = nextOffset;
-                return;
-              }
-              step_size = (step_size > 4) ? 4 : 1;
-              continue;
-            }
-          }
-          ++curr;
-        } while (curr != finish);
-        nextOffset += step_size;
-      }
-    }
-
-    //Offsetting on identical characters.  This function works a character
-    //at a time for optimal worst-case performance.
-    template<class RandomAccessIter, class Get_char, class Get_length>
-    inline void
-    update_offset(RandomAccessIter first, RandomAccessIter finish,
-                  size_t &char_offset, Get_char getchar, Get_length length)
-    {
-      size_t nextOffset = char_offset;
-      while (true) {
-        RandomAccessIter curr = first;
-        do {
-          //ignore empties, but if the nextOffset would exceed the length or
-          //not match, exit; we've found the last matching character
-          if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
-            || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
-            char_offset = nextOffset;
-            return;
-          }
-        } while (++curr != finish);
-        ++nextOffset;
-      }
-    }
-
-    //This comparison functor assumes strings are identical up to char_offset
-    template<class Data_type, class Unsigned_char_type>
-    struct offset_less_than {
-      offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
-      inline bool operator()(const Data_type &x, const Data_type &y) const
-      {
-        size_t minSize = (std::min)(x.size(), y.size());
-        for (size_t u = fchar_offset; u < minSize; ++u) {
-          BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
-          if (static_cast<Unsigned_char_type>(x[u]) !=
-              static_cast<Unsigned_char_type>(y[u])) {
-            return static_cast<Unsigned_char_type>(x[u]) < 
-              static_cast<Unsigned_char_type>(y[u]);
-          }
-        }
-        return x.size() < y.size();
-      }
-      size_t fchar_offset;
-    };
-
-    //Compares strings assuming they are identical up to char_offset
-    template<class Data_type, class Unsigned_char_type>
-    struct offset_greater_than {
-      offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
-      inline bool operator()(const Data_type &x, const Data_type &y) const
-      {
-        size_t minSize = (std::min)(x.size(), y.size());
-        for (size_t u = fchar_offset; u < minSize; ++u) {
-          BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
-          if (static_cast<Unsigned_char_type>(x[u]) !=
-              static_cast<Unsigned_char_type>(y[u])) {
-            return static_cast<Unsigned_char_type>(x[u]) > 
-              static_cast<Unsigned_char_type>(y[u]);
-          }
-        }
-        return x.size() > y.size();
-      }
-      size_t fchar_offset;
-    };
-
-    //This comparison functor assumes strings are identical up to char_offset
-    template<class Data_type, class Get_char, class Get_length>
-    struct offset_char_less_than {
-      offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
-      inline bool operator()(const Data_type &x, const Data_type &y) const
-      {
-        size_t minSize = (std::min)(length(x), length(y));
-        for (size_t u = fchar_offset; u < minSize; ++u) {
-          if (getchar(x, u) != getchar(y, u)) {
-            return getchar(x, u) < getchar(y, u);
-          }
-        }
-        return length(x) < length(y);
-      }
-      size_t fchar_offset;
-      Get_char getchar;
-      Get_length length;
-    };
-
-    //String sorting recursive implementation
-    template <class RandomAccessIter, class Unsigned_char_type>
-    inline void
-    string_sort_rec(RandomAccessIter first, RandomAccessIter last,
-                    size_t char_offset,
-                    std::vector<RandomAccessIter> &bin_cache,
-                    unsigned cache_offset, size_t *bin_sizes)
-    {
-      typedef typename std::iterator_traits<RandomAccessIter>::value_type
-        Data_type;
-      //This section makes handling of long identical substrings much faster
-      //with a mild average performance impact.
-      //Iterate to the end of the empties.  If all empty, return
-      while ((*first).size() <= char_offset) {
-        if (++first == last)
-          return;
-      }
-      RandomAccessIter finish = last - 1;
-      //Getting the last non-empty
-      for (;(*finish).size() <= char_offset; --finish);
-      ++finish;
-      //Offsetting on identical characters.  This section works
-      //a few characters at a time for optimal worst-case performance.
-      update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
-                                                          char_offset);
-      
-      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
-      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
-      const unsigned max_size = bin_count;
-      const unsigned membin_count = bin_count + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, membin_count) + 1;
-
-      //Calculating the size of each bin; this takes roughly 10% of runtime
-      for (RandomAccessIter current = first; current != last; ++current) {
-        if ((*current).size() <= char_offset) {
-          bin_sizes[0]++;
-        }
-        else
-          bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
-                    + 1]++;
-      }
-      //Assign the bin positions
-      bin_cache[cache_offset] = first;
-      for (unsigned u = 0; u < membin_count - 1; u++)
-        bin_cache[cache_offset + u + 1] =
-          bin_cache[cache_offset + u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter next_bin_start = first;
-      //handling empty bins
-      RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
-      next_bin_start +=  bin_sizes[0];
-      RandomAccessIter * target_bin;
-      //Iterating over each element in the bin of empties
-      for (RandomAccessIter current = *local_bin; current < next_bin_start;
-          ++current) {
-        //empties belong in this bin
-        while ((*current).size() > char_offset) {
-          target_bin =
-            bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
-          iter_swap(current, (*target_bin)++);
-        }
-      }
-      *local_bin = next_bin_start;
-      //iterate backwards to find the last bin with elements in it
-      //this saves iterations in multiple loops
-      unsigned last_bin = bin_count - 1;
-      for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
-      //This dominates runtime, mostly in the swap and bin lookups
-      for (unsigned u = 0; u < last_bin; ++u) {
-        local_bin = bins + u;
-        next_bin_start += bin_sizes[u + 1];
-        //Iterating over each element in this bin
-        for (RandomAccessIter current = *local_bin; current < next_bin_start;
-            ++current) {
-          //Swapping into place until the correct element has been swapped in
-          for (target_bin = bins + static_cast<Unsigned_char_type>
-              ((*current)[char_offset]);  target_bin != local_bin;
-            target_bin = bins + static_cast<Unsigned_char_type>
-              ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
-        }
-        *local_bin = next_bin_start;
-      }
-      bins[last_bin] = last;
-      //Recursing
-      RandomAccessIter lastPos = bin_cache[cache_offset];
-      //Skip this loop for empties
-      for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        //don't sort unless there are at least two items to Compare
-        if (count < 2)
-          continue;
-        //using std::sort if its worst-case is better
-        if (count < max_size)
-          std::sort(lastPos, bin_cache[u],
-              offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
-        else
-          string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
-              bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
-      }
-    }
-
-    //Sorts strings in reverse order, with empties at the end
-    template <class RandomAccessIter, class Unsigned_char_type>
-    inline void
-    reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
-                            size_t char_offset,
-                            std::vector<RandomAccessIter> &bin_cache,
-                            unsigned cache_offset,
-                            size_t *bin_sizes)
-    {
-      typedef typename std::iterator_traits<RandomAccessIter>::value_type
-        Data_type;
-      //This section makes handling of long identical substrings much faster
-      //with a mild average performance impact.
-      RandomAccessIter curr = first;
-      //Iterate to the end of the empties.  If all empty, return
-      while ((*curr).size() <= char_offset) {
-        if (++curr == last)
-          return;
-      }
-      //Getting the last non-empty
-      while ((*(--last)).size() <= char_offset);
-      ++last;
-      //Offsetting on identical characters.  This section works
-      //a few characters at a time for optimal worst-case performance.
-      update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
-                                                          char_offset);
-      RandomAccessIter * target_bin;
-
-      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
-      //Equal worst-case of radix and comparison when bin_count = n*log(n).
-      const unsigned max_size = bin_count;
-      const unsigned membin_count = bin_count + 1;
-      const unsigned max_bin = bin_count - 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, membin_count);
-      RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
-
-      //Calculating the size of each bin; this takes roughly 10% of runtime
-      for (RandomAccessIter current = first; current != last; ++current) {
-        if ((*current).size() <= char_offset) {
-          bin_sizes[bin_count]++;
-        }
-        else
-          bin_sizes[max_bin - static_cast<Unsigned_char_type>
-            ((*current)[char_offset])]++;
-      }
-      //Assign the bin positions
-      bin_cache[cache_offset] = first;
-      for (unsigned u = 0; u < membin_count - 1; u++)
-        bin_cache[cache_offset + u + 1] =
-          bin_cache[cache_offset + u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter next_bin_start = last;
-      //handling empty bins
-      RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
-      RandomAccessIter lastFull = *local_bin;
-      //Iterating over each element in the bin of empties
-      for (RandomAccessIter current = *local_bin; current < next_bin_start;
-          ++current) {
-        //empties belong in this bin
-        while ((*current).size() > char_offset) {
-          target_bin =
-            end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
-          iter_swap(current, (*target_bin)++);
-        }
-      }
-      *local_bin = next_bin_start;
-      next_bin_start = first;
-      //iterate backwards to find the last non-empty bin
-      //this saves iterations in multiple loops
-      unsigned last_bin = max_bin;
-      for (; last_bin && !bin_sizes[last_bin]; --last_bin);
-      //This dominates runtime, mostly in the swap and bin lookups
-      for (unsigned u = 0; u < last_bin; ++u) {
-        local_bin = bins + u;
-        next_bin_start += bin_sizes[u];
-        //Iterating over each element in this bin
-        for (RandomAccessIter current = *local_bin; current < next_bin_start;
-            ++current) {
-          //Swapping into place until the correct element has been swapped in
-          for (target_bin =
-            end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
-            target_bin != local_bin;
-            target_bin =
-            end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
-              iter_swap(current, (*target_bin)++);
-        }
-        *local_bin = next_bin_start;
-      }
-      bins[last_bin] = lastFull;
-      //Recursing
-      RandomAccessIter lastPos = first;
-      //Skip this loop for empties
-      for (unsigned u = cache_offset; u <= cache_offset + last_bin;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        //don't sort unless there are at least two items to Compare
-        if (count < 2)
-          continue;
-        //using std::sort if its worst-case is better
-        if (count < max_size)
-          std::sort(lastPos, bin_cache[u], offset_greater_than<Data_type,
-                    Unsigned_char_type>(char_offset + 1));
-        else
-          reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
-    (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
-      }
-    }
-
-    //String sorting recursive implementation
-    template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
-              class Get_length>
-    inline void
-    string_sort_rec(RandomAccessIter first, RandomAccessIter last,
-              size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
-              unsigned cache_offset, size_t *bin_sizes,
-              Get_char getchar, Get_length length)
-    {
-      typedef typename std::iterator_traits<RandomAccessIter>::value_type
-        Data_type;
-      //This section makes handling of long identical substrings much faster
-      //with a mild average performance impact.
-      //Iterate to the end of the empties.  If all empty, return
-      while (length(*first) <= char_offset) {
-        if (++first == last)
-          return;
-      }
-      RandomAccessIter finish = last - 1;
-      //Getting the last non-empty
-      for (;length(*finish) <= char_offset; --finish);
-      ++finish;
-      update_offset(first, finish, char_offset, getchar, length);
-
-      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
-      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
-      const unsigned max_size = bin_count;
-      const unsigned membin_count = bin_count + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, membin_count) + 1;
-
-      //Calculating the size of each bin; this takes roughly 10% of runtime
-      for (RandomAccessIter current = first; current != last; ++current) {
-        if (length(*current) <= char_offset) {
-          bin_sizes[0]++;
-        }
-        else
-          bin_sizes[getchar((*current), char_offset) + 1]++;
-      }
-      //Assign the bin positions
-      bin_cache[cache_offset] = first;
-      for (unsigned u = 0; u < membin_count - 1; u++)
-        bin_cache[cache_offset + u + 1] =
-          bin_cache[cache_offset + u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter next_bin_start = first;
-      //handling empty bins
-      RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
-      next_bin_start +=  bin_sizes[0];
-      RandomAccessIter * target_bin;
-      //Iterating over each element in the bin of empties
-      for (RandomAccessIter current = *local_bin; current < next_bin_start;
-          ++current) {
-        //empties belong in this bin
-        while (length(*current) > char_offset) {
-          target_bin = bins + getchar((*current), char_offset);
-          iter_swap(current, (*target_bin)++);
-        }
-      }
-      *local_bin = next_bin_start;
-      //iterate backwards to find the last bin with elements in it
-      //this saves iterations in multiple loops
-      unsigned last_bin = bin_count - 1;
-      for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
-      //This dominates runtime, mostly in the swap and bin lookups
-      for (unsigned ii = 0; ii < last_bin; ++ii) {
-        local_bin = bins + ii;
-        next_bin_start += bin_sizes[ii + 1];
-        //Iterating over each element in this bin
-        for (RandomAccessIter current = *local_bin; current < next_bin_start;
-            ++current) {
-          //Swapping into place until the correct element has been swapped in
-          for (target_bin = bins + getchar((*current), char_offset);
-              target_bin != local_bin;
-              target_bin = bins + getchar((*current), char_offset))
-            iter_swap(current, (*target_bin)++);
-        }
-        *local_bin = next_bin_start;
-      }
-      bins[last_bin] = last;
-
-      //Recursing
-      RandomAccessIter lastPos = bin_cache[cache_offset];
-      //Skip this loop for empties
-      for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        //don't sort unless there are at least two items to Compare
-        if (count < 2)
-          continue;
-        //using std::sort if its worst-case is better
-        if (count < max_size)
-          std::sort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
-                    Get_char, Get_length>(char_offset + 1));
-        else
-          string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
-            Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
-                        cache_end, bin_sizes, getchar, length);
-      }
-    }
-
-    //String sorting recursive implementation
-    template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
-              class Get_length, class Compare>
-    inline void
-    string_sort_rec(RandomAccessIter first, RandomAccessIter last,
-              size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
-              unsigned cache_offset, size_t *bin_sizes,
-              Get_char getchar, Get_length length, Compare comp)
-    {
-      //This section makes handling of long identical substrings much faster
-      //with a mild average performance impact.
-      //Iterate to the end of the empties.  If all empty, return
-      while (length(*first) <= char_offset) {
-        if (++first == last)
-          return;
-      }
-      RandomAccessIter finish = last - 1;
-      //Getting the last non-empty
-      for (;length(*finish) <= char_offset; --finish);
-      ++finish;
-      update_offset(first, finish, char_offset, getchar, length);
-
-      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
-      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
-      const unsigned max_size = bin_count;
-      const unsigned membin_count = bin_count + 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, membin_count) + 1;
-
-      //Calculating the size of each bin; this takes roughly 10% of runtime
-      for (RandomAccessIter current = first; current != last; ++current) {
-        if (length(*current) <= char_offset) {
-          bin_sizes[0]++;
-        }
-        else
-          bin_sizes[getchar((*current), char_offset) + 1]++;
-      }
-      //Assign the bin positions
-      bin_cache[cache_offset] = first;
-      for (unsigned u = 0; u < membin_count - 1; u++)
-        bin_cache[cache_offset + u + 1] =
-          bin_cache[cache_offset + u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter next_bin_start = first;
-      //handling empty bins
-      RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
-      next_bin_start +=  bin_sizes[0];
-      RandomAccessIter * target_bin;
-      //Iterating over each element in the bin of empties
-      for (RandomAccessIter current = *local_bin; current < next_bin_start;
-          ++current) {
-        //empties belong in this bin
-        while (length(*current) > char_offset) {
-          target_bin = bins + getchar((*current), char_offset);
-          iter_swap(current, (*target_bin)++);
-        }
-      }
-      *local_bin = next_bin_start;
-      //iterate backwards to find the last bin with elements in it
-      //this saves iterations in multiple loops
-      unsigned last_bin = bin_count - 1;
-      for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
-      //This dominates runtime, mostly in the swap and bin lookups
-      for (unsigned u = 0; u < last_bin; ++u) {
-        local_bin = bins + u;
-        next_bin_start += bin_sizes[u + 1];
-        //Iterating over each element in this bin
-        for (RandomAccessIter current = *local_bin; current < next_bin_start;
-            ++current) {
-          //Swapping into place until the correct element has been swapped in
-          for (target_bin = bins + getchar((*current), char_offset);
-              target_bin != local_bin;
-              target_bin = bins + getchar((*current), char_offset))
-            iter_swap(current, (*target_bin)++);
-        }
-        *local_bin = next_bin_start;
-      }
-      bins[last_bin] = last;
-
-      //Recursing
-      RandomAccessIter lastPos = bin_cache[cache_offset];
-      //Skip this loop for empties
-      for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        //don't sort unless there are at least two items to Compare
-        if (count < 2)
-          continue;
-        //using std::sort if its worst-case is better
-        if (count < max_size)
-          std::sort(lastPos, bin_cache[u], comp);
-        else
-          string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
-                          Get_length, Compare>
-            (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
-             bin_sizes, getchar, length, comp);
-      }
-    }
-
-    //Sorts strings in reverse order, with empties at the end
-    template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
-              class Get_length, class Compare>
-    inline void
-    reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
-              size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
-              unsigned cache_offset, size_t *bin_sizes,
-              Get_char getchar, Get_length length, Compare comp)
-    {
-      //This section makes handling of long identical substrings much faster
-      //with a mild average performance impact.
-      RandomAccessIter curr = first;
-      //Iterate to the end of the empties.  If all empty, return
-      while (length(*curr) <= char_offset) {
-        if (++curr == last)
-          return;
-      }
-      //Getting the last non-empty
-      while (length(*(--last)) <= char_offset);
-      ++last;
-      //Offsetting on identical characters.  This section works
-      //a character at a time for optimal worst-case performance.
-      update_offset(curr, last, char_offset, getchar, length);
-
-      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
-      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
-      const unsigned max_size = bin_count;
-      const unsigned membin_count = bin_count + 1;
-      const unsigned max_bin = bin_count - 1;
-      unsigned cache_end;
-      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
-                                          cache_end, membin_count);
-      RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
-
-      //Calculating the size of each bin; this takes roughly 10% of runtime
-      for (RandomAccessIter current = first; current != last; ++current) {
-        if (length(*current) <= char_offset) {
-          bin_sizes[bin_count]++;
-        }
-        else
-          bin_sizes[max_bin - getchar((*current), char_offset)]++;
-      }
-      //Assign the bin positions
-      bin_cache[cache_offset] = first;
-      for (unsigned u = 0; u < membin_count - 1; u++)
-        bin_cache[cache_offset + u + 1] =
-          bin_cache[cache_offset + u] + bin_sizes[u];
-
-      //Swap into place
-      RandomAccessIter next_bin_start = last;
-      //handling empty bins
-      RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
-      RandomAccessIter lastFull = *local_bin;
-      RandomAccessIter * target_bin;
-      //Iterating over each element in the bin of empties
-      for (RandomAccessIter current = *local_bin; current < next_bin_start;
-          ++current) {
-        //empties belong in this bin
-        while (length(*current) > char_offset) {
-          target_bin = end_bin - getchar((*current), char_offset);
-          iter_swap(current, (*target_bin)++);
-        }
-      }
-      *local_bin = next_bin_start;
-      next_bin_start = first;
-      //iterate backwards to find the last bin with elements in it
-      //this saves iterations in multiple loops
-      unsigned last_bin = max_bin;
-      for (; last_bin && !bin_sizes[last_bin]; --last_bin);
-      //This dominates runtime, mostly in the swap and bin lookups
-      for (unsigned u = 0; u < last_bin; ++u) {
-        local_bin = bins + u;
-        next_bin_start += bin_sizes[u];
-        //Iterating over each element in this bin
-        for (RandomAccessIter current = *local_bin; current < next_bin_start;
-            ++current) {
-          //Swapping into place until the correct element has been swapped in
-          for (target_bin = end_bin - getchar((*current), char_offset);
-              target_bin != local_bin;
-              target_bin = end_bin - getchar((*current), char_offset))
-            iter_swap(current, (*target_bin)++);
-        }
-        *local_bin = next_bin_start;
-      }
-      bins[last_bin] = lastFull;
-      //Recursing
-      RandomAccessIter lastPos = first;
-      //Skip this loop for empties
-      for (unsigned u = cache_offset; u <= cache_offset + last_bin;
-          lastPos = bin_cache[u], ++u) {
-        size_t count = bin_cache[u] - lastPos;
-        //don't sort unless there are at least two items to Compare
-        if (count < 2)
-          continue;
-        //using std::sort if its worst-case is better
-        if (count < max_size)
-          std::sort(lastPos, bin_cache[u], comp);
-        else
-          reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
-                                  Get_char, Get_length, Compare>
-            (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
-             bin_sizes, getchar, length, comp);
-      }
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Unsigned_char_type>
-    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                      >::type
-    string_sort(RandomAccessIter first, RandomAccessIter last,
-                Unsigned_char_type)
-    {
-      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
-      std::vector<RandomAccessIter> bin_cache;
-      string_sort_rec<RandomAccessIter, Unsigned_char_type>
-        (first, last, 0, bin_cache, 0, bin_sizes);
-    }
-
-    template <class RandomAccessIter, class Unsigned_char_type>
-    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                       >::type
-    string_sort(RandomAccessIter first, RandomAccessIter last,
-                Unsigned_char_type)
-    {
-      //Warning that we're using std::sort, even though string_sort was called
-      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
-      std::sort(first, last);
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Unsigned_char_type>
-    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                      >::type
-    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
-                        Unsigned_char_type)
-    {
-      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
-      std::vector<RandomAccessIter> bin_cache;
-      reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
-        (first, last, 0, bin_cache, 0, bin_sizes);
-    }
-
-    template <class RandomAccessIter, class Unsigned_char_type>
-    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                       >::type
-    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
-                Unsigned_char_type)
-    {
-      typedef typename std::iterator_traits<RandomAccessIter>::value_type
-        Data_type;
-      //Warning that we're using std::sort, even though string_sort was called
-      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
-      std::sort(first, last, std::greater<Data_type>());
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Get_char, class Get_length,
-              class Unsigned_char_type>
-    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                      >::type
-    string_sort(RandomAccessIter first, RandomAccessIter last,
-                Get_char getchar, Get_length length, Unsigned_char_type)
-    {
-      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
-      std::vector<RandomAccessIter> bin_cache;
-      string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
-        Get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
-    }
-
-    template <class RandomAccessIter, class Get_char, class Get_length,
-              class Unsigned_char_type>
-    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                       >::type
-    string_sort(RandomAccessIter first, RandomAccessIter last,
-                Get_char getchar, Get_length length, Unsigned_char_type)
-    {
-      //Warning that we're using std::sort, even though string_sort was called
-      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
-      std::sort(first, last);
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Get_char, class Get_length,
-              class Compare, class Unsigned_char_type>
-    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                      >::type
-    string_sort(RandomAccessIter first, RandomAccessIter last,
-        Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
-    {
-      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
-      std::vector<RandomAccessIter> bin_cache;
-      string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
-        , Get_length, Compare>
-        (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
-    }
-
-    //disable_if_c was refusing to compile, so rewrote to use enable_if_c
-    template <class RandomAccessIter, class Get_char, class Get_length,
-              class Compare, class Unsigned_char_type>
-    inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
-                                        >::type
-    string_sort(RandomAccessIter first, RandomAccessIter last,
-        Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
-    {
-      //Warning that we're using std::sort, even though string_sort was called
-      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
-      std::sort(first, last, comp);
-    }
-
-    //Holds the bin vector and makes the initial recursive call
-    template <class RandomAccessIter, class Get_char, class Get_length,
-              class Compare, class Unsigned_char_type>
-    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                      >::type
-    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
-        Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
-    {
-      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
-      std::vector<RandomAccessIter> bin_cache;
-      reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
-                              Get_length, Compare>
-        (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
-    }
-
-    template <class RandomAccessIter, class Get_char, class Get_length,
-              class Compare, class Unsigned_char_type>
-    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
-                                                                       >::type
-    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
-        Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
-    {
-      //Warning that we're using std::sort, even though string_sort was called
-      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
-      std::sort(first, last, comp);
-    }
-  }
-}
-}
-}
-
-#endif
+// Details for a templated general-case hybrid-radix string_sort.
+
+//          Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+*/
+
+#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
+#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <limits>
+#include <functional>
+#include <boost/static_assert.hpp>
+#include <boost/serialization/static_warning.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
+#include <boost/cstdint.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+  namespace detail {
+    static const int max_step_size = 64;
+
+    //Offsetting on identical characters.  This function works a chunk of
+    //characters at a time for cache efficiency and optimal worst-case
+    //performance.
+    template<class RandomAccessIter, class Unsigned_char_type>
+    inline void
+    update_offset(RandomAccessIter first, RandomAccessIter finish,
+                  size_t &char_offset)
+    {
+      const int char_size = sizeof(Unsigned_char_type);
+      size_t nextOffset = char_offset;
+      int step_size = max_step_size / char_size;
+      while (true) {
+        RandomAccessIter curr = first;
+        do {
+          //Ignore empties, but if the nextOffset would exceed the length or
+          //not match, exit; we've found the last matching character
+          //This will reduce the step_size if the current step doesn't match.
+          if ((*curr).size() > char_offset) {
+            if((*curr).size() <= (nextOffset + step_size)) {
+              step_size = (*curr).size() - nextOffset - 1;
+              if (step_size < 1) {
+                char_offset = nextOffset;
+                return;
+              }
+            }
+            const int step_byte_size = step_size * char_size;
+            if (memcmp(curr->data() + nextOffset, first->data() + nextOffset, 
+                       step_byte_size) != 0) {
+              if (step_size == 1) {
+                char_offset = nextOffset;
+                return;
+              }
+              step_size = (step_size > 4) ? 4 : 1;
+              continue;
+            }
+          }
+          ++curr;
+        } while (curr != finish);
+        nextOffset += step_size;
+      }
+    }
+
+    //Offsetting on identical characters.  This function works a character
+    //at a time for optimal worst-case performance.
+    template<class RandomAccessIter, class Get_char, class Get_length>
+    inline void
+    update_offset(RandomAccessIter first, RandomAccessIter finish,
+                  size_t &char_offset, Get_char get_character, Get_length length)
+    {
+      size_t nextOffset = char_offset;
+      while (true) {
+        RandomAccessIter curr = first;
+        do {
+          //ignore empties, but if the nextOffset would exceed the length or
+          //not match, exit; we've found the last matching character
+          if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
+            || get_character((*curr), nextOffset) != get_character((*first), nextOffset))) {
+            char_offset = nextOffset;
+            return;
+          }
+        } while (++curr != finish);
+        ++nextOffset;
+      }
+    }
+
+    //This comparison functor assumes strings are identical up to char_offset
+    template<class Data_type, class Unsigned_char_type>
+    struct offset_less_than {
+      offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
+      inline bool operator()(const Data_type &x, const Data_type &y) const
+      {
+        size_t minSize = (std::min)(x.size(), y.size());
+        for (size_t u = fchar_offset; u < minSize; ++u) {
+          BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
+          if (static_cast<Unsigned_char_type>(x[u]) !=
+              static_cast<Unsigned_char_type>(y[u])) {
+            return static_cast<Unsigned_char_type>(x[u]) < 
+              static_cast<Unsigned_char_type>(y[u]);
+          }
+        }
+        return x.size() < y.size();
+      }
+      size_t fchar_offset;
+    };
+
+    //Compares strings assuming they are identical up to char_offset
+    template<class Data_type, class Unsigned_char_type>
+    struct offset_greater_than {
+      offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
+      inline bool operator()(const Data_type &x, const Data_type &y) const
+      {
+        size_t minSize = (std::min)(x.size(), y.size());
+        for (size_t u = fchar_offset; u < minSize; ++u) {
+          BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
+          if (static_cast<Unsigned_char_type>(x[u]) !=
+              static_cast<Unsigned_char_type>(y[u])) {
+            return static_cast<Unsigned_char_type>(x[u]) > 
+              static_cast<Unsigned_char_type>(y[u]);
+          }
+        }
+        return x.size() > y.size();
+      }
+      size_t fchar_offset;
+    };
+
+    //This comparison functor assumes strings are identical up to char_offset
+    template<class Data_type, class Get_char, class Get_length>
+    struct offset_char_less_than {
+      offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
+      inline bool operator()(const Data_type &x, const Data_type &y) const
+      {
+        size_t minSize = (std::min)(length(x), length(y));
+        for (size_t u = fchar_offset; u < minSize; ++u) {
+          if (get_character(x, u) != get_character(y, u)) {
+            return get_character(x, u) < get_character(y, u);
+          }
+        }
+        return length(x) < length(y);
+      }
+      size_t fchar_offset;
+      Get_char get_character;
+      Get_length length;
+    };
+
+    //String sorting recursive implementation
+    template <class RandomAccessIter, class Unsigned_char_type>
+    inline void
+    string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+                    size_t char_offset,
+                    std::vector<RandomAccessIter> &bin_cache,
+                    unsigned cache_offset, size_t *bin_sizes)
+    {
+      typedef typename std::iterator_traits<RandomAccessIter>::value_type
+        Data_type;
+      //This section makes handling of long identical substrings much faster
+      //with a mild average performance impact.
+      //Iterate to the end of the empties.  If all empty, return
+      while ((*first).size() <= char_offset) {
+        if (++first == last)
+          return;
+      }
+      RandomAccessIter finish = last - 1;
+      //Getting the last non-empty
+      for (;(*finish).size() <= char_offset; --finish);
+      ++finish;
+      //Offsetting on identical characters.  This section works
+      //a few characters at a time for optimal worst-case performance.
+      update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
+                                                          char_offset);
+      
+      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+      const unsigned max_size = bin_count;
+      const unsigned membin_count = bin_count + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, membin_count) + 1;
+
+      //Calculating the size of each bin; this takes roughly 10% of runtime
+      for (RandomAccessIter current = first; current != last; ++current) {
+        if ((*current).size() <= char_offset) {
+          bin_sizes[0]++;
+        }
+        else
+          bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
+                    + 1]++;
+      }
+      //Assign the bin positions
+      bin_cache[cache_offset] = first;
+      for (unsigned u = 0; u < membin_count - 1; u++)
+        bin_cache[cache_offset + u + 1] =
+          bin_cache[cache_offset + u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter next_bin_start = first;
+      //handling empty bins
+      RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
+      next_bin_start +=  bin_sizes[0];
+      RandomAccessIter * target_bin;
+      //Iterating over each element in the bin of empties
+      for (RandomAccessIter current = *local_bin; current < next_bin_start;
+          ++current) {
+        //empties belong in this bin
+        while ((*current).size() > char_offset) {
+          target_bin =
+            bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
+          iter_swap(current, (*target_bin)++);
+        }
+      }
+      *local_bin = next_bin_start;
+      //iterate backwards to find the last bin with elements in it
+      //this saves iterations in multiple loops
+      unsigned last_bin = bin_count - 1;
+      for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
+      //This dominates runtime, mostly in the swap and bin lookups
+      for (unsigned u = 0; u < last_bin; ++u) {
+        local_bin = bins + u;
+        next_bin_start += bin_sizes[u + 1];
+        //Iterating over each element in this bin
+        for (RandomAccessIter current = *local_bin; current < next_bin_start;
+            ++current) {
+          //Swapping into place until the correct element has been swapped in
+          for (target_bin = bins + static_cast<Unsigned_char_type>
+              ((*current)[char_offset]);  target_bin != local_bin;
+            target_bin = bins + static_cast<Unsigned_char_type>
+              ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
+        }
+        *local_bin = next_bin_start;
+      }
+      bins[last_bin] = last;
+      //Recursing
+      RandomAccessIter lastPos = bin_cache[cache_offset];
+      //Skip this loop for empties
+      for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        //don't sort unless there are at least two items to Compare
+        if (count < 2)
+          continue;
+        //using std::sort if its worst-case is better
+        if (count < max_size)
+          std::sort(lastPos, bin_cache[u],
+              offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
+        else
+          string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
+              bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
+      }
+    }
+
+    //Sorts strings in reverse order, with empties at the end
+    template <class RandomAccessIter, class Unsigned_char_type>
+    inline void
+    reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+                            size_t char_offset,
+                            std::vector<RandomAccessIter> &bin_cache,
+                            unsigned cache_offset,
+                            size_t *bin_sizes)
+    {
+      typedef typename std::iterator_traits<RandomAccessIter>::value_type
+        Data_type;
+      //This section makes handling of long identical substrings much faster
+      //with a mild average performance impact.
+      RandomAccessIter curr = first;
+      //Iterate to the end of the empties.  If all empty, return
+      while ((*curr).size() <= char_offset) {
+        if (++curr == last)
+          return;
+      }
+      //Getting the last non-empty
+      while ((*(--last)).size() <= char_offset);
+      ++last;
+      //Offsetting on identical characters.  This section works
+      //a few characters at a time for optimal worst-case performance.
+      update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
+                                                          char_offset);
+      RandomAccessIter * target_bin;
+
+      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+      //Equal worst-case of radix and comparison when bin_count = n*log(n).
+      const unsigned max_size = bin_count;
+      const unsigned membin_count = bin_count + 1;
+      const unsigned max_bin = bin_count - 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, membin_count);
+      RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
+
+      //Calculating the size of each bin; this takes roughly 10% of runtime
+      for (RandomAccessIter current = first; current != last; ++current) {
+        if ((*current).size() <= char_offset) {
+          bin_sizes[bin_count]++;
+        }
+        else
+          bin_sizes[max_bin - static_cast<Unsigned_char_type>
+            ((*current)[char_offset])]++;
+      }
+      //Assign the bin positions
+      bin_cache[cache_offset] = first;
+      for (unsigned u = 0; u < membin_count - 1; u++)
+        bin_cache[cache_offset + u + 1] =
+          bin_cache[cache_offset + u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter next_bin_start = last;
+      //handling empty bins
+      RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
+      RandomAccessIter lastFull = *local_bin;
+      //Iterating over each element in the bin of empties
+      for (RandomAccessIter current = *local_bin; current < next_bin_start;
+          ++current) {
+        //empties belong in this bin
+        while ((*current).size() > char_offset) {
+          target_bin =
+            end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
+          iter_swap(current, (*target_bin)++);
+        }
+      }
+      *local_bin = next_bin_start;
+      next_bin_start = first;
+      //iterate backwards to find the last non-empty bin
+      //this saves iterations in multiple loops
+      unsigned last_bin = max_bin;
+      for (; last_bin && !bin_sizes[last_bin]; --last_bin);
+      //This dominates runtime, mostly in the swap and bin lookups
+      for (unsigned u = 0; u < last_bin; ++u) {
+        local_bin = bins + u;
+        next_bin_start += bin_sizes[u];
+        //Iterating over each element in this bin
+        for (RandomAccessIter current = *local_bin; current < next_bin_start;
+            ++current) {
+          //Swapping into place until the correct element has been swapped in
+          for (target_bin =
+            end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
+            target_bin != local_bin;
+            target_bin =
+            end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
+              iter_swap(current, (*target_bin)++);
+        }
+        *local_bin = next_bin_start;
+      }
+      bins[last_bin] = lastFull;
+      //Recursing
+      RandomAccessIter lastPos = first;
+      //Skip this loop for empties
+      for (unsigned u = cache_offset; u <= cache_offset + last_bin;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        //don't sort unless there are at least two items to Compare
+        if (count < 2)
+          continue;
+        //using std::sort if its worst-case is better
+        if (count < max_size)
+          std::sort(lastPos, bin_cache[u], offset_greater_than<Data_type,
+                    Unsigned_char_type>(char_offset + 1));
+        else
+          reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
+    (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
+      }
+    }
+
+    //String sorting recursive implementation
+    template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
+              class Get_length>
+    inline void
+    string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+              size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
+              unsigned cache_offset, size_t *bin_sizes,
+              Get_char get_character, Get_length length)
+    {
+      typedef typename std::iterator_traits<RandomAccessIter>::value_type
+        Data_type;
+      //This section makes handling of long identical substrings much faster
+      //with a mild average performance impact.
+      //Iterate to the end of the empties.  If all empty, return
+      while (length(*first) <= char_offset) {
+        if (++first == last)
+          return;
+      }
+      RandomAccessIter finish = last - 1;
+      //Getting the last non-empty
+      for (;length(*finish) <= char_offset; --finish);
+      ++finish;
+      update_offset(first, finish, char_offset, get_character, length);
+
+      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+      const unsigned max_size = bin_count;
+      const unsigned membin_count = bin_count + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, membin_count) + 1;
+
+      //Calculating the size of each bin; this takes roughly 10% of runtime
+      for (RandomAccessIter current = first; current != last; ++current) {
+        if (length(*current) <= char_offset) {
+          bin_sizes[0]++;
+        }
+        else
+          bin_sizes[get_character((*current), char_offset) + 1]++;
+      }
+      //Assign the bin positions
+      bin_cache[cache_offset] = first;
+      for (unsigned u = 0; u < membin_count - 1; u++)
+        bin_cache[cache_offset + u + 1] =
+          bin_cache[cache_offset + u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter next_bin_start = first;
+      //handling empty bins
+      RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
+      next_bin_start +=  bin_sizes[0];
+      RandomAccessIter * target_bin;
+      //Iterating over each element in the bin of empties
+      for (RandomAccessIter current = *local_bin; current < next_bin_start;
+          ++current) {
+        //empties belong in this bin
+        while (length(*current) > char_offset) {
+          target_bin = bins + get_character((*current), char_offset);
+          iter_swap(current, (*target_bin)++);
+        }
+      }
+      *local_bin = next_bin_start;
+      //iterate backwards to find the last bin with elements in it
+      //this saves iterations in multiple loops
+      unsigned last_bin = bin_count - 1;
+      for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
+      //This dominates runtime, mostly in the swap and bin lookups
+      for (unsigned ii = 0; ii < last_bin; ++ii) {
+        local_bin = bins + ii;
+        next_bin_start += bin_sizes[ii + 1];
+        //Iterating over each element in this bin
+        for (RandomAccessIter current = *local_bin; current < next_bin_start;
+            ++current) {
+          //Swapping into place until the correct element has been swapped in
+          for (target_bin = bins + get_character((*current), char_offset);
+              target_bin != local_bin;
+              target_bin = bins + get_character((*current), char_offset))
+            iter_swap(current, (*target_bin)++);
+        }
+        *local_bin = next_bin_start;
+      }
+      bins[last_bin] = last;
+
+      //Recursing
+      RandomAccessIter lastPos = bin_cache[cache_offset];
+      //Skip this loop for empties
+      for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        //don't sort unless there are at least two items to Compare
+        if (count < 2)
+          continue;
+        //using std::sort if its worst-case is better
+        if (count < max_size)
+          std::sort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
+                    Get_char, Get_length>(char_offset + 1));
+        else
+          string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+            Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
+                        cache_end, bin_sizes, get_character, length);
+      }
+    }
+
+    //String sorting recursive implementation
+    template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
+              class Get_length, class Compare>
+    inline void
+    string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+              size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
+              unsigned cache_offset, size_t *bin_sizes,
+              Get_char get_character, Get_length length, Compare comp)
+    {
+      //This section makes handling of long identical substrings much faster
+      //with a mild average performance impact.
+      //Iterate to the end of the empties.  If all empty, return
+      while (length(*first) <= char_offset) {
+        if (++first == last)
+          return;
+      }
+      RandomAccessIter finish = last - 1;
+      //Getting the last non-empty
+      for (;length(*finish) <= char_offset; --finish);
+      ++finish;
+      update_offset(first, finish, char_offset, get_character, length);
+
+      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+      const unsigned max_size = bin_count;
+      const unsigned membin_count = bin_count + 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, membin_count) + 1;
+
+      //Calculating the size of each bin; this takes roughly 10% of runtime
+      for (RandomAccessIter current = first; current != last; ++current) {
+        if (length(*current) <= char_offset) {
+          bin_sizes[0]++;
+        }
+        else
+          bin_sizes[get_character((*current), char_offset) + 1]++;
+      }
+      //Assign the bin positions
+      bin_cache[cache_offset] = first;
+      for (unsigned u = 0; u < membin_count - 1; u++)
+        bin_cache[cache_offset + u + 1] =
+          bin_cache[cache_offset + u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter next_bin_start = first;
+      //handling empty bins
+      RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
+      next_bin_start +=  bin_sizes[0];
+      RandomAccessIter * target_bin;
+      //Iterating over each element in the bin of empties
+      for (RandomAccessIter current = *local_bin; current < next_bin_start;
+          ++current) {
+        //empties belong in this bin
+        while (length(*current) > char_offset) {
+          target_bin = bins + get_character((*current), char_offset);
+          iter_swap(current, (*target_bin)++);
+        }
+      }
+      *local_bin = next_bin_start;
+      //iterate backwards to find the last bin with elements in it
+      //this saves iterations in multiple loops
+      unsigned last_bin = bin_count - 1;
+      for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
+      //This dominates runtime, mostly in the swap and bin lookups
+      for (unsigned u = 0; u < last_bin; ++u) {
+        local_bin = bins + u;
+        next_bin_start += bin_sizes[u + 1];
+        //Iterating over each element in this bin
+        for (RandomAccessIter current = *local_bin; current < next_bin_start;
+            ++current) {
+          //Swapping into place until the correct element has been swapped in
+          for (target_bin = bins + get_character((*current), char_offset);
+              target_bin != local_bin;
+              target_bin = bins + get_character((*current), char_offset))
+            iter_swap(current, (*target_bin)++);
+        }
+        *local_bin = next_bin_start;
+      }
+      bins[last_bin] = last;
+
+      //Recursing
+      RandomAccessIter lastPos = bin_cache[cache_offset];
+      //Skip this loop for empties
+      for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        //don't sort unless there are at least two items to Compare
+        if (count < 2)
+          continue;
+        //using std::sort if its worst-case is better
+        if (count < max_size)
+          std::sort(lastPos, bin_cache[u], comp);
+        else
+          string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+                          Get_length, Compare>
+            (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
+             bin_sizes, get_character, length, comp);
+      }
+    }
+
+    //Sorts strings in reverse order, with empties at the end
+    template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
+              class Get_length, class Compare>
+    inline void
+    reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+              size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
+              unsigned cache_offset, size_t *bin_sizes,
+              Get_char get_character, Get_length length, Compare comp)
+    {
+      //This section makes handling of long identical substrings much faster
+      //with a mild average performance impact.
+      RandomAccessIter curr = first;
+      //Iterate to the end of the empties.  If all empty, return
+      while (length(*curr) <= char_offset) {
+        if (++curr == last)
+          return;
+      }
+      //Getting the last non-empty
+      while (length(*(--last)) <= char_offset);
+      ++last;
+      //Offsetting on identical characters.  This section works
+      //a character at a time for optimal worst-case performance.
+      update_offset(curr, last, char_offset, get_character, length);
+
+      const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+      //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+      const unsigned max_size = bin_count;
+      const unsigned membin_count = bin_count + 1;
+      const unsigned max_bin = bin_count - 1;
+      unsigned cache_end;
+      RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+                                          cache_end, membin_count);
+      RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
+
+      //Calculating the size of each bin; this takes roughly 10% of runtime
+      for (RandomAccessIter current = first; current != last; ++current) {
+        if (length(*current) <= char_offset) {
+          bin_sizes[bin_count]++;
+        }
+        else
+          bin_sizes[max_bin - get_character((*current), char_offset)]++;
+      }
+      //Assign the bin positions
+      bin_cache[cache_offset] = first;
+      for (unsigned u = 0; u < membin_count - 1; u++)
+        bin_cache[cache_offset + u + 1] =
+          bin_cache[cache_offset + u] + bin_sizes[u];
+
+      //Swap into place
+      RandomAccessIter next_bin_start = last;
+      //handling empty bins
+      RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
+      RandomAccessIter lastFull = *local_bin;
+      RandomAccessIter * target_bin;
+      //Iterating over each element in the bin of empties
+      for (RandomAccessIter current = *local_bin; current < next_bin_start;
+          ++current) {
+        //empties belong in this bin
+        while (length(*current) > char_offset) {
+          target_bin = end_bin - get_character((*current), char_offset);
+          iter_swap(current, (*target_bin)++);
+        }
+      }
+      *local_bin = next_bin_start;
+      next_bin_start = first;
+      //iterate backwards to find the last bin with elements in it
+      //this saves iterations in multiple loops
+      unsigned last_bin = max_bin;
+      for (; last_bin && !bin_sizes[last_bin]; --last_bin);
+      //This dominates runtime, mostly in the swap and bin lookups
+      for (unsigned u = 0; u < last_bin; ++u) {
+        local_bin = bins + u;
+        next_bin_start += bin_sizes[u];
+        //Iterating over each element in this bin
+        for (RandomAccessIter current = *local_bin; current < next_bin_start;
+            ++current) {
+          //Swapping into place until the correct element has been swapped in
+          for (target_bin = end_bin - get_character((*current), char_offset);
+              target_bin != local_bin;
+              target_bin = end_bin - get_character((*current), char_offset))
+            iter_swap(current, (*target_bin)++);
+        }
+        *local_bin = next_bin_start;
+      }
+      bins[last_bin] = lastFull;
+      //Recursing
+      RandomAccessIter lastPos = first;
+      //Skip this loop for empties
+      for (unsigned u = cache_offset; u <= cache_offset + last_bin;
+          lastPos = bin_cache[u], ++u) {
+        size_t count = bin_cache[u] - lastPos;
+        //don't sort unless there are at least two items to Compare
+        if (count < 2)
+          continue;
+        //using std::sort if its worst-case is better
+        if (count < max_size)
+          std::sort(lastPos, bin_cache[u], comp);
+        else
+          reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
+                                  Get_char, Get_length, Compare>
+            (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
+             bin_sizes, get_character, length, comp);
+      }
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Unsigned_char_type>
+    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                      >::type
+    string_sort(RandomAccessIter first, RandomAccessIter last,
+                Unsigned_char_type)
+    {
+      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+      std::vector<RandomAccessIter> bin_cache;
+      string_sort_rec<RandomAccessIter, Unsigned_char_type>
+        (first, last, 0, bin_cache, 0, bin_sizes);
+    }
+
+    template <class RandomAccessIter, class Unsigned_char_type>
+    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                       >::type
+    string_sort(RandomAccessIter first, RandomAccessIter last,
+                Unsigned_char_type)
+    {
+      //Warning that we're using std::sort, even though string_sort was called
+      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+      std::sort(first, last);
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Unsigned_char_type>
+    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                      >::type
+    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+                        Unsigned_char_type)
+    {
+      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+      std::vector<RandomAccessIter> bin_cache;
+      reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
+        (first, last, 0, bin_cache, 0, bin_sizes);
+    }
+
+    template <class RandomAccessIter, class Unsigned_char_type>
+    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                       >::type
+    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+                Unsigned_char_type)
+    {
+      typedef typename std::iterator_traits<RandomAccessIter>::value_type
+        Data_type;
+      //Warning that we're using std::sort, even though string_sort was called
+      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+      std::sort(first, last, std::greater<Data_type>());
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Get_char, class Get_length,
+              class Unsigned_char_type>
+    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                      >::type
+    string_sort(RandomAccessIter first, RandomAccessIter last,
+                Get_char get_character, Get_length length, Unsigned_char_type)
+    {
+      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+      std::vector<RandomAccessIter> bin_cache;
+      string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+        Get_length>(first, last, 0, bin_cache, 0, bin_sizes, get_character, length);
+    }
+
+    template <class RandomAccessIter, class Get_char, class Get_length,
+              class Unsigned_char_type>
+    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                       >::type
+    string_sort(RandomAccessIter first, RandomAccessIter last,
+                Get_char get_character, Get_length length, Unsigned_char_type)
+    {
+      //Warning that we're using std::sort, even though string_sort was called
+      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+      std::sort(first, last);
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Get_char, class Get_length,
+              class Compare, class Unsigned_char_type>
+    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                      >::type
+    string_sort(RandomAccessIter first, RandomAccessIter last,
+        Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+    {
+      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+      std::vector<RandomAccessIter> bin_cache;
+      string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
+        , Get_length, Compare>
+        (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
+    }
+
+    //disable_if_c was refusing to compile, so rewrote to use enable_if_c
+    template <class RandomAccessIter, class Get_char, class Get_length,
+              class Compare, class Unsigned_char_type>
+    inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
+                                        >::type
+    string_sort(RandomAccessIter first, RandomAccessIter last,
+        Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+    {
+      //Warning that we're using std::sort, even though string_sort was called
+      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+      std::sort(first, last, comp);
+    }
+
+    //Holds the bin vector and makes the initial recursive call
+    template <class RandomAccessIter, class Get_char, class Get_length,
+              class Compare, class Unsigned_char_type>
+    inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                      >::type
+    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+        Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+    {
+      size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+      std::vector<RandomAccessIter> bin_cache;
+      reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+                              Get_length, Compare>
+        (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
+    }
+
+    template <class RandomAccessIter, class Get_char, class Get_length,
+              class Compare, class Unsigned_char_type>
+    inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+                                                                       >::type
+    reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+        Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+    {
+      //Warning that we're using std::sort, even though string_sort was called
+      BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+      std::sort(first, last, comp);
+    }
+  }
+}
+}
+}
+
+#endif
diff --git a/boost/sort/spreadsort/float_sort.hpp b/boost/sort/spreadsort/float_sort.hpp
index 37966c28db..d5310d19ce 100644
--- a/boost/sort/spreadsort/float_sort.hpp
+++ b/boost/sort/spreadsort/float_sort.hpp
@@ -1,134 +1,176 @@
-//Templated Spreadsort-based implementation of float_sort and float_mem_cast
-
-//          Copyright Steven J. Ross 2001 - 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort/ for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-float_mem_cast fix provided by:
-Scott McMurray
-*/
-
-#ifndef BOOST_FLOAT_SORT_HPP
-#define BOOST_FLOAT_SORT_HPP
-#include <algorithm>
-#include <vector>
-#include <cstring>
-#include <limits>
-#include <boost/static_assert.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/sort/spreadsort/detail/float_sort.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
-
-  /*!
-  \brief Casts a float to the specified integer type.
-
-  \tparam Data_type Floating-point IEEE 754/IEC559 type.
-  \tparam Cast_type Integer type (same size) to which to cast.
-
-  \par Example:
-  \code
-  struct rightshift {
-    int operator()(const DATA_TYPE &x, const unsigned offset) const {
-      return float_mem_cast<KEY_TYPE, CAST_TYPE>(x.key) >> offset;
-    }
-  };
-  \endcode
-  */
-  template<class Data_type, class Cast_type>
-  inline Cast_type
-  float_mem_cast(const Data_type & data)
-  {
-    // Only cast IEEE floating-point numbers, and only to a same-sized integer.
-    BOOST_STATIC_ASSERT(sizeof(Cast_type) == sizeof(Data_type));
-    BOOST_STATIC_ASSERT(std::numeric_limits<Data_type>::is_iec559);
-    BOOST_STATIC_ASSERT(std::numeric_limits<Cast_type>::is_integer);
-    Cast_type result;
-    std::memcpy(&result, &data, sizeof(Cast_type));
-    return result;
-  }
-
-
-  /*!
-    \brief @c float_sort with casting to the appropriate size.
-
-    \param[in] first Iterator pointer to first element.
-    \param[in] last Iterator pointing to one beyond the end of data.
-
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_float_sort.htm"> windows_float_sort</a>
-   \n
-   <a href="../../doc/graph/osx_float_sort.htm"> osx_float_sort</a>
-
-
-
-   \par A simple example of sorting some floating-point is:
-   \code
-     vector<float> vec;
-     vec.push_back(1.0);
-     vec.push_back(2.3);
-     vec.push_back(1.3);
-     spreadsort(vec.begin(), vec.end());
-   \endcode
-   \par The sorted vector contains ascending values "1.0 1.3 2.3".
-
-  */
-  template <class RandomAccessIter>
-  inline void float_sort(RandomAccessIter first, RandomAccessIter last)
-  {
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last);
-    else
-      detail::float_sort(first, last);
-  }
-
-  /*!
-    \brief Floating-point sort algorithm using random access iterators with just right-shift functor.
-
-    \param[in] first Iterator pointer to first element.
-    \param[in] last Iterator pointing to one beyond the end of data.
-    \param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
-
-  */
-  template <class RandomAccessIter, class Right_shift>
-  inline void float_sort(RandomAccessIter first, RandomAccessIter last,
-                         Right_shift rshift)
-  {
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last);
-    else
-      detail::float_sort(first, last, rshift(*first, 0), rshift);
-  }
-
-
-  /*!
-   \brief Float sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
-   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
-  */
-
-  template <class RandomAccessIter, class Right_shift, class Compare>
-  inline void float_sort(RandomAccessIter first, RandomAccessIter last,
-                         Right_shift rshift, Compare comp)
-  {
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last, comp);
-    else
-      detail::float_sort(first, last, rshift(*first, 0), rshift, comp);
-  }
-}
-}
-}
-
-#endif
+//Templated Spreadsort-based implementation of float_sort and float_mem_cast
+
+//          Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort/ for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+float_mem_cast fix provided by:
+Scott McMurray
+*/
+
+#ifndef BOOST_FLOAT_SORT_HPP
+#define BOOST_FLOAT_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <limits>
+#include <boost/static_assert.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/float_sort.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+
+  /*!
+  \brief Casts a float to the specified integer type.
+
+  \tparam Data_type Floating-point IEEE 754/IEC559 type.
+  \tparam Cast_type Integer type (same size) to which to cast.
+
+  \par Example:
+  \code
+  struct rightshift {
+    int operator()(const DATA_TYPE &x, const unsigned offset) const {
+      return float_mem_cast<KEY_TYPE, CAST_TYPE>(x.key) >> offset;
+    }
+  };
+  \endcode
+  */
+  template<class Data_type, class Cast_type>
+  inline Cast_type
+  float_mem_cast(const Data_type & data)
+  {
+    // Only cast IEEE floating-point numbers, and only to a same-sized integer.
+    BOOST_STATIC_ASSERT(sizeof(Cast_type) == sizeof(Data_type));
+    BOOST_STATIC_ASSERT(std::numeric_limits<Data_type>::is_iec559);
+    BOOST_STATIC_ASSERT(std::numeric_limits<Cast_type>::is_integer);
+    Cast_type result;
+    std::memcpy(&result, &data, sizeof(Cast_type));
+    return result;
+  }
+
+
+  /*!
+    \brief @c float_sort with casting to the appropriate size.
+
+    \param[in] first Iterator pointer to first element.
+    \param[in] last Iterator pointing to one beyond the end of data.
+
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_float_sort.htm"> windows_float_sort</a>
+   \n
+   <a href="../../doc/graph/osx_float_sort.htm"> osx_float_sort</a>
+
+
+
+   \par A simple example of sorting some floating-point is:
+   \code
+     vector<float> vec;
+     vec.push_back(1.0);
+     vec.push_back(2.3);
+     vec.push_back(1.3);
+     spreadsort(vec.begin(), vec.end());
+   \endcode
+   \par The sorted vector contains ascending values "1.0 1.3 2.3".
+
+  */
+  template <class RandomAccessIter>
+  inline void float_sort(RandomAccessIter first, RandomAccessIter last)
+  {
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last);
+    else
+      detail::float_sort(first, last);
+  }
+
+    /*!
+    \brief Floating-point sort algorithm using range.
+
+    \param[in] range Range [first, last) for sorting.
+
+  */
+  template <class Range>
+  inline void float_sort(Range& range)
+  {
+    float_sort(boost::begin(range), boost::end(range));
+  }
+
+  /*!
+    \brief Floating-point sort algorithm using random access iterators with just right-shift functor.
+
+    \param[in] first Iterator pointer to first element.
+    \param[in] last Iterator pointing to one beyond the end of data.
+    \param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
+
+  */
+  template <class RandomAccessIter, class Right_shift>
+  inline void float_sort(RandomAccessIter first, RandomAccessIter last,
+                         Right_shift rshift)
+  {
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last);
+    else
+      detail::float_sort(first, last, rshift(*first, 0), rshift);
+  }
+
+    /*!
+    \brief Floating-point sort algorithm using range with just right-shift functor.
+
+    \param[in] range Range [first, last) for sorting.
+    \param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
+
+  */
+  template <class Range, class Right_shift>
+  inline void float_sort(Range& range, Right_shift rshift)
+  {
+      float_sort(boost::begin(range), boost::end(range), rshift);
+  }
+
+
+  /*!
+   \brief Float sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+  */
+
+  template <class RandomAccessIter, class Right_shift, class Compare>
+  inline void float_sort(RandomAccessIter first, RandomAccessIter last,
+                         Right_shift rshift, Compare comp)
+  {
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last, comp);
+    else
+      detail::float_sort(first, last, rshift(*first, 0), rshift, comp);
+  }
+
+
+    /*!
+   \brief Float sort algorithm using range with both right-shift and user-defined comparison operator.
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+  */
+
+  template <class Range, class Right_shift, class Compare>
+  inline void float_sort(Range& range, Right_shift rshift, Compare comp)
+  {
+      float_sort(boost::begin(range), boost::end(range), rshift, comp);
+  }
+}
+}
+}
+
+#endif
diff --git a/boost/sort/spreadsort/integer_sort.hpp b/boost/sort/spreadsort/integer_sort.hpp
index 0727ccd4a0..6bf3f683e1 100644
--- a/boost/sort/spreadsort/integer_sort.hpp
+++ b/boost/sort/spreadsort/integer_sort.hpp
@@ -1,185 +1,315 @@
-//Templated Spreadsort-based implementation of integer_sort
-
-//          Copyright Steven J. Ross 2001 - 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort/ for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-
-Doxygen comments by Paul A. Bristow Jan 2015
-
-*/
-
-#ifndef BOOST_INTEGER_SORT_HPP
-#define BOOST_INTEGER_SORT_HPP
-#include <algorithm>
-#include <vector>
-#include <cstring>
-#include <limits>
-#include <boost/static_assert.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/sort/spreadsort/detail/integer_sort.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
-  //Top-level sorting call for integers.
-
-
-/*! \brief Integer sort algorithm using random access iterators.
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-   which returns an integer-type right-shifted a specified number of bits.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-  template <class RandomAccessIter>
-  inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
-  {
-    // Don't sort if it's too small to optimize.
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last);
-    else
-      detail::integer_sort(first, last, *first >> 0);
-  }
-
-/*! \brief Integer sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
-   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \return @c void.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-*/
-  template <class RandomAccessIter, class Right_shift, class Compare>
-  inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
-                           Right_shift shift, Compare comp) {
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last, comp);
-    else
-      detail::integer_sort(first, last, shift(*first, 0), shift, comp);
-  }
-
-/*! \brief Integer sort algorithm using random access iterators with just right-shift functor.
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-
-\par Performance:
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-  * <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
-  * <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-  template <class RandomAccessIter, class Right_shift>
-  inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
-                           Right_shift shift) {
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last);
-    else
-      detail::integer_sort(first, last, shift(*first, 0), shift);
-  }
-}
-}
-}
-
-#endif
-
+//Templated Spreadsort-based implementation of integer_sort
+
+//          Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort/ for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+
+Doxygen comments by Paul A. Bristow Jan 2015
+
+*/
+
+#ifndef BOOST_INTEGER_SORT_HPP
+#define BOOST_INTEGER_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <limits>
+#include <boost/static_assert.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/integer_sort.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+  //Top-level sorting call for integers.
+
+
+/*! \brief Integer sort algorithm using random access iterators.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c integer_sort is asymptotically faster
+than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
+so its worst-case with default settings for 32-bit integers is
+<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
+   \n
+   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+   which returns an integer-type right-shifted a specified number of bits.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+  template <class RandomAccessIter>
+  inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
+  {
+    // Don't sort if it's too small to optimize.
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last);
+    else
+      detail::integer_sort(first, last, *first >> 0);
+  }
+
+/*! \brief Integer sort algorithm using range.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c integer_sort is asymptotically faster
+than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
+so its worst-case with default settings for 32-bit integers is
+<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
+   \n
+   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+   \param[in] range Range [first, last) for sorting.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+template <class Range>
+inline void integer_sort(Range& range)
+{
+  integer_sort(boost::begin(range), boost::end(range));
+}
+
+/*! \brief Integer sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c integer_sort is asymptotically faster
+than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
+so its worst-case with default settings for 32-bit integers is
+<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
+   \n
+   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+*/
+  template <class RandomAccessIter, class Right_shift, class Compare>
+  inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
+                           Right_shift shift, Compare comp) {
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last, comp);
+    else
+      detail::integer_sort(first, last, shift(*first, 0), shift, comp);
+  }
+
+/*! \brief Integer sort algorithm using range with both right-shift and user-defined comparison operator.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c integer_sort is asymptotically faster
+than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
+so its worst-case with default settings for 32-bit integers is
+<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
+   \n
+   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+*/
+template <class Range, class Right_shift, class Compare>
+inline void integer_sort(Range& range, Right_shift shift, Compare comp)
+{
+  integer_sort(boost::begin(range), boost::end(range), shift, comp);
+}
+
+/*! \brief Integer sort algorithm using random access iterators with just right-shift functor.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+
+\par Performance:
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c integer_sort is asymptotically faster
+than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
+so its worst-case with default settings for 32-bit integers is
+<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+  * <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
+  * <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+  template <class RandomAccessIter, class Right_shift>
+  inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
+                           Right_shift shift) {
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last);
+    else
+      detail::integer_sort(first, last, shift(*first, 0), shift);
+  }
+
+
+/*! \brief Integer sort algorithm using range with just right-shift functor.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+
+\par Performance:
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c integer_sort is asymptotically faster
+than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
+so its worst-case with default settings for 32-bit integers is
+<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+  * <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
+  * <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+template <class Range, class Right_shift>
+inline void integer_sort(Range& range, Right_shift shift)
+{
+  integer_sort(boost::begin(range), boost::end(range), shift);
+}
+}
+}
+}
+
+#endif
+
diff --git a/boost/sort/spreadsort/spreadsort.hpp b/boost/sort/spreadsort/spreadsort.hpp
index 48377123e3..49f20ed147 100644
--- a/boost/sort/spreadsort/spreadsort.hpp
+++ b/boost/sort/spreadsort/spreadsort.hpp
@@ -1,146 +1,169 @@
-// Templated generic hybrid sorting
-
-//          Copyright Steven J. Ross 2001 - 2009.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort/ for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-float_mem_cast fix provided by:
-Scott McMurray
-*/
-
-#ifndef BOOST_SORT_SPREADSORT_HPP
-#define BOOST_SORT_SPREADSORT_HPP
-#include <algorithm>
-#include <vector>
-#include <cstring>
-#include <string>
-#include <limits>
-#include <boost/type_traits.hpp>
-#include <boost/sort/spreadsort/integer_sort.hpp>
-#include <boost/sort/spreadsort/float_sort.hpp>
-#include <boost/sort/spreadsort/string_sort.hpp>
-
-namespace boost {
-namespace sort {
-
-/*! Namespace for spreadsort sort variants for different data types.
-\note Use hyperlinks (coloured) to get detailed information about functions.
-*/
-namespace spreadsort {
-
-  /*!
-    \brief Generic @c spreadsort variant detecting integer-type elements so call to @c integer_sort.
-    \details If the data type provided is an integer, @c integer_sort is used.
-    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
-    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
-    \param[in] first Iterator pointer to first element.
-    \param[in] last Iterator pointing to one beyond the end of data.
-
-    \pre [@c first, @c last) is a valid range.
-    \pre @c RandomAccessIter @c value_type is mutable.
-    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-    which returns an integer-type right-shifted a specified number of bits.
-    \post The elements in the range [@c first, @c last) are sorted in ascending order.
-  */
-
-  template <class RandomAccessIter>
-  inline typename boost::enable_if_c< std::numeric_limits<
-    typename std::iterator_traits<RandomAccessIter>::value_type >::is_integer,
-    void >::type
-  spreadsort(RandomAccessIter first, RandomAccessIter last)
-  {
-    integer_sort(first, last);
-  }
-
-  /*!
-    \brief Generic @c spreadsort variant detecting float element type so call to @c float_sort.
-    \details If the data type provided is a float or castable-float, @c float_sort is used.
-    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
-    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
-
-    \param[in] first Iterator pointer to first element.
-    \param[in] last Iterator pointing to one beyond the end of data.
-
-    \pre [@c first, @c last) is a valid range.
-    \pre @c RandomAccessIter @c value_type is mutable.
-    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-    which returns an integer-type right-shifted a specified number of bits.
-    \post The elements in the range [@c first, @c last) are sorted in ascending order.
-  */
-
-  template <class RandomAccessIter>
-  inline typename boost::enable_if_c< !std::numeric_limits<
-    typename std::iterator_traits<RandomAccessIter>::value_type >::is_integer
-    && std::numeric_limits<
-    typename std::iterator_traits<RandomAccessIter>::value_type >::is_iec559,
-    void >::type
-  spreadsort(RandomAccessIter first, RandomAccessIter last)
-  {
-    float_sort(first, last);
-  }
-
-  /*!
-    \brief  Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::strings.
-    \details If the data type provided is a string, @c string_sort is used.
-    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
-    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
-
-    \param[in] first Iterator pointer to first element.
-    \param[in] last Iterator pointing to one beyond the end of data.
-
-    \pre [@c first, @c last) is a valid range.
-    \pre @c RandomAccessIter @c value_type is mutable.
-    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-    which returns an integer-type right-shifted a specified number of bits.
-    \post The elements in the range [@c first, @c last) are sorted in ascending order.
-  */
-
-  template <class RandomAccessIter>
-  inline typename boost::enable_if_c<
-    is_same<typename std::iterator_traits<RandomAccessIter>::value_type,
-            typename std::string>::value, void >::type
-  spreadsort(RandomAccessIter first, RandomAccessIter last)
-  {
-    string_sort(first, last);
-  }
-
-  /*!
-    \brief  Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::wstrings.
-    \details If the data type provided is a wstring, @c string_sort is used.
-    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
-    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.  Also, 2-byte wide-characters are the limit above which string_sort is inefficient, so on platforms with wider characters, this will not accept wstrings.
-
-    \param[in] first Iterator pointer to first element.
-    \param[in] last Iterator pointing to one beyond the end of data.
-
-    \pre [@c first, @c last) is a valid range.
-    \pre @c RandomAccessIter @c value_type is mutable.
-    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-    which returns an integer-type right-shifted a specified number of bits.
-    \post The elements in the range [@c first, @c last) are sorted in ascending order.
-  */
-  template <class RandomAccessIter>
-  inline typename boost::enable_if_c<
-    is_same<typename std::iterator_traits<RandomAccessIter>::value_type,
-            typename std::wstring>::value &&
-    sizeof(wchar_t) == 2, void >::type
-  spreadsort(RandomAccessIter first, RandomAccessIter last)
-  {
-    boost::uint16_t unused = 0;
-    string_sort(first, last, unused);
-  }
-} // namespace spreadsort
-} // namespace sort
-} // namespace boost
-
-#endif
+// Templated generic hybrid sorting
+
+//          Copyright Steven J. Ross 2001 - 2009.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort/ for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+float_mem_cast fix provided by:
+Scott McMurray
+ Range support provided by:
+ Alexander Zaitsev
+*/
+
+#ifndef BOOST_SORT_SPREADSORT_HPP
+#define BOOST_SORT_SPREADSORT_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <string>
+#include <limits>
+#include <boost/type_traits.hpp>
+#include <boost/sort/spreadsort/integer_sort.hpp>
+#include <boost/sort/spreadsort/float_sort.hpp>
+#include <boost/sort/spreadsort/string_sort.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+
+namespace boost {
+namespace sort {
+
+/*! Namespace for spreadsort sort variants for different data types.
+\note Use hyperlinks (coloured) to get detailed information about functions.
+*/
+namespace spreadsort {
+
+  /*!
+    \brief Generic @c spreadsort variant detecting integer-type elements so call to @c integer_sort.
+    \details If the data type provided is an integer, @c integer_sort is used.
+    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
+    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
+    \param[in] first Iterator pointer to first element.
+    \param[in] last Iterator pointing to one beyond the end of data.
+
+    \pre [@c first, @c last) is a valid range.
+    \pre @c RandomAccessIter @c value_type is mutable.
+    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+    which returns an integer-type right-shifted a specified number of bits.
+    \post The elements in the range [@c first, @c last) are sorted in ascending order.
+  */
+
+  template <class RandomAccessIter>
+  inline typename boost::enable_if_c< std::numeric_limits<
+    typename std::iterator_traits<RandomAccessIter>::value_type >::is_integer,
+    void >::type
+  spreadsort(RandomAccessIter first, RandomAccessIter last)
+  {
+    integer_sort(first, last);
+  }
+
+  /*!
+    \brief Generic @c spreadsort variant detecting float element type so call to @c float_sort.
+    \details If the data type provided is a float or castable-float, @c float_sort is used.
+    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
+    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
+
+    \param[in] first Iterator pointer to first element.
+    \param[in] last Iterator pointing to one beyond the end of data.
+
+    \pre [@c first, @c last) is a valid range.
+    \pre @c RandomAccessIter @c value_type is mutable.
+    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+    which returns an integer-type right-shifted a specified number of bits.
+    \post The elements in the range [@c first, @c last) are sorted in ascending order.
+  */
+
+  template <class RandomAccessIter>
+  inline typename boost::enable_if_c< !std::numeric_limits<
+    typename std::iterator_traits<RandomAccessIter>::value_type >::is_integer
+    && std::numeric_limits<
+    typename std::iterator_traits<RandomAccessIter>::value_type >::is_iec559,
+    void >::type
+  spreadsort(RandomAccessIter first, RandomAccessIter last)
+  {
+    float_sort(first, last);
+  }
+
+  /*!
+    \brief  Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::strings.
+    \details If the data type provided is a string, @c string_sort is used.
+    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
+    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
+
+    \param[in] first Iterator pointer to first element.
+    \param[in] last Iterator pointing to one beyond the end of data.
+
+    \pre [@c first, @c last) is a valid range.
+    \pre @c RandomAccessIter @c value_type is mutable.
+    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+    which returns an integer-type right-shifted a specified number of bits.
+    \post The elements in the range [@c first, @c last) are sorted in ascending order.
+  */
+
+  template <class RandomAccessIter>
+  inline typename boost::enable_if_c<
+    is_same<typename std::iterator_traits<RandomAccessIter>::value_type,
+            typename std::string>::value, void >::type
+  spreadsort(RandomAccessIter first, RandomAccessIter last)
+  {
+    string_sort(first, last);
+  }
+
+  /*!
+    \brief  Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::wstrings.
+    \details If the data type provided is a wstring, @c string_sort is used.
+    \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
+    as @c spreadsort won't accept types that don't have the appropriate @c type_traits.  Also, 2-byte wide-characters are the limit above which string_sort is inefficient, so on platforms with wider characters, this will not accept wstrings.
+
+    \param[in] first Iterator pointer to first element.
+    \param[in] last Iterator pointing to one beyond the end of data.
+
+    \pre [@c first, @c last) is a valid range.
+    \pre @c RandomAccessIter @c value_type is mutable.
+    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+    \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+    which returns an integer-type right-shifted a specified number of bits.
+    \post The elements in the range [@c first, @c last) are sorted in ascending order.
+  */
+  template <class RandomAccessIter>
+  inline typename boost::enable_if_c<
+    is_same<typename std::iterator_traits<RandomAccessIter>::value_type,
+            typename std::wstring>::value &&
+    sizeof(wchar_t) == 2, void >::type
+  spreadsort(RandomAccessIter first, RandomAccessIter last)
+  {
+    boost::uint16_t unused = 0;
+    string_sort(first, last, unused);
+  }
+
+/*!
+\brief Generic @c spreadsort variant detects value_type and calls required sort function.
+\note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
+as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
+
+\param[in] range Range [first, last) for sorting.
+
+\pre [@c first, @c last) is a valid range.
+\post The elements in the range [@c first, @c last) are sorted in ascending order.
+*/
+
+template <class Range>
+void spreadsort(Range& range)
+{
+    spreadsort(boost::begin(range), boost::end(range));
+}
+
+
+} // namespace spreadsort
+} // namespace sort
+} // namespace boost
+
+#endif
diff --git a/boost/sort/spreadsort/string_sort.hpp b/boost/sort/spreadsort/string_sort.hpp
index 4c3f1fbfe7..daaa054b6c 100644
--- a/boost/sort/spreadsort/string_sort.hpp
+++ b/boost/sort/spreadsort/string_sort.hpp
@@ -1,449 +1,741 @@
-//Templated hybrid string_sort
-
-//          Copyright Steven J. Ross 2001 - 2009.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-// See http://www.boost.org/libs/sort/ for library home page.
-
-/*
-Some improvements suggested by:
-Phil Endecott and Frank Gennari
-*/
-
-#ifndef BOOST_STRING_SORT_HPP
-#define BOOST_STRING_SORT_HPP
-#include <algorithm>
-#include <vector>
-#include <cstring>
-#include <limits>
-#include <boost/static_assert.hpp>
-#include <boost/sort/spreadsort/detail/constants.hpp>
-#include <boost/sort/spreadsort/detail/string_sort.hpp>
-
-namespace boost {
-namespace sort {
-namespace spreadsort {
-
-/*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-\par
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
-<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
-
-   \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
-   \tparam Unsigned_char_type  Unsigned character type used for string.
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type.  The actual value is unused.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-   which returns an integer-type right-shifted a specified number of bits.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-
-  template <class RandomAccessIter, class Unsigned_char_type>
-  inline void string_sort(RandomAccessIter first, RandomAccessIter last,
-                          Unsigned_char_type unused)
-  {
-    //Don't sort if it's too small to optimize
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last);
-    else
-      detail::string_sort(first, last, unused);
-  }
-
-
-/*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
-   \n
-   <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-   which returns an integer-type right-shifted a specified number of bits.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-  template <class RandomAccessIter>
-  inline void string_sort(RandomAccessIter first, RandomAccessIter last)
-  {
-    unsigned char unused = '\0';
-    string_sort(first, last, unused);
-  }
-
-
-/*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
-
-  (All variants fall back to @c std::sort if the data size is too small, < detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-
-   \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
-   \tparam Comp Functor type to use for comparison.
-   \tparam Unsigned_char_type Unsigned character type used for string.
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
-   \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type.  The actual value is unused.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-   which returns an integer-type right-shifted a specified number of bits.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \return @c void.
-
-   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-*/
-  template <class RandomAccessIter, class Compare, class Unsigned_char_type>
-  inline void reverse_string_sort(RandomAccessIter first,
-                RandomAccessIter last, Compare comp, Unsigned_char_type unused)
-  {
-    //Don't sort if it's too small to optimize.
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last, comp);
-    else
-      detail::reverse_string_sort(first, last, unused);
-  }
-
-
-/*! \brief String sort algorithm using random access iterators,  wraps using default of @c unsigned char.
-
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-   which returns an integer-type right-shifted a specified number of bits.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \return @c void.
-
-   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-*/
-  template <class RandomAccessIter, class Compare>
-  inline void reverse_string_sort(RandomAccessIter first,
-                                  RandomAccessIter last, Compare comp)
-  {
-    unsigned char unused = '\0';
-    reverse_string_sort(first, last, comp, unused);
-  }
-
-
-/*! \brief String sort algorithm using random access iterators,  wraps using default of @c unsigned char.
-
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
-   \param[in] length Functor to get the length of the string in characters.
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
-   which returns an integer-type right-shifted a specified number of bits.
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \return @c void.
-
-   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-  template <class RandomAccessIter, class Get_char, class Get_length>
-  inline void string_sort(RandomAccessIter first, RandomAccessIter last,
-                          Get_char getchar, Get_length length)
-  {
-    //Don't sort if it's too small to optimize
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last);
-    else {
-      //skipping past empties, which allows us to get the character type
-      //.empty() is not used so as not to require a user declaration of it
-      while (!length(*first)) {
-        if (++first == last)
-          return;
-      }
-      detail::string_sort(first, last, getchar, length, getchar((*first), 0));
-    }
-  }
-
-
-
-/*! \brief String sort algorithm using random access iterators,  wraps using default of @c unsigned char.
-
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
-   \param[in] length Functor to get the length of the string in characters.
-   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
-
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \return @c void.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-  template <class RandomAccessIter, class Get_char, class Get_length,
-            class Compare>
-  inline void string_sort(RandomAccessIter first, RandomAccessIter last,
-                          Get_char getchar, Get_length length, Compare comp)
-  {
-    //Don't sort if it's too small to optimize
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last, comp);
-    else {
-      //skipping past empties, which allows us to get the character type
-      //.empty() is not used so as not to require a user declaration of it
-      while (!length(*first)) {
-        if (++first == last)
-          return;
-      }
-      detail::string_sort(first, last, getchar, length, comp,
-                          getchar((*first), 0));
-    }
-  }
-
-
-/*! \brief Reverse String sort algorithm using random access iterators.
-
-  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
-
-  \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
-which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
-Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
-so @c integer_sort is asymptotically faster
-than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
-so its worst-case with default settings for 32-bit integers is
-<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
-Some performance plots of runtime vs. n and log(range) are provided:\n
-   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
-   \n
-   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
-
-
-   \param[in] first Iterator pointer to first element.
-   \param[in] last Iterator pointing to one beyond the end of data.
-   \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
-   \param[in] length Functor to get the length of the string in characters.
-   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
-
-
-   \pre [@c first, @c last) is a valid range.
-   \pre @c RandomAccessIter @c value_type is mutable.
-   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
-   \post The elements in the range [@c first, @c last) are sorted in ascending order.
-
-   \return @c void.
-
-   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
-   the right shift, subtraction of right-shifted elements, functors,
-   or any operations on iterators throw.
-
-   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
-   \warning Invalid arguments cause undefined behaviour.
-   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
-   enabling faster generic-programming.
-
-   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
-   \remark  *  N is @c last - @c first,
-   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
-   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
-
-*/
-  template <class RandomAccessIter, class Get_char, class Get_length,
-            class Compare>
-  inline void reverse_string_sort(RandomAccessIter first,
-    RandomAccessIter last, Get_char getchar, Get_length length, Compare comp)
-  {
-    //Don't sort if it's too small to optimize
-    if (last - first < detail::min_sort_size)
-      std::sort(first, last, comp);
-    else {
-      //skipping past empties, which allows us to get the character type
-      //.empty() is not used so as not to require a user declaration of it
-      while (!length(*(--last))) {
-        //If there is just one non-empty at the beginning, this is sorted
-        if (first == last)
-          return;
-      }
-      //making last just after the end of the non-empty part of the array
-      detail::reverse_string_sort(first, last + 1, getchar, length, comp,
-                                  getchar((*last), 0));
-    }
-  }
-}
-}
-}
-
-#endif
+//Templated hybrid string_sort
+
+//          Copyright Steven J. Ross 2001 - 2009.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort/ for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+*/
+
+#ifndef BOOST_STRING_SORT_HPP
+#define BOOST_STRING_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <limits>
+#include <boost/static_assert.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/string_sort.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+
+/*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
+   \tparam Unsigned_char_type  Unsigned character type used for string.
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type.  The actual value is unused.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+   which returns an integer-type right-shifted a specified number of bits.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+
+  template <class RandomAccessIter, class Unsigned_char_type>
+  inline void string_sort(RandomAccessIter first, RandomAccessIter last,
+                          Unsigned_char_type unused)
+  {
+    //Don't sort if it's too small to optimize
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last);
+    else
+      detail::string_sort(first, last, unused);
+  }
+
+/*! \brief String sort algorithm using range, allowing character-type overloads.\n
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \tparam Unsigned_char_type  Unsigned character type used for string.
+   \param[in] range Range [first, last) for sorting.
+   \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type.  The actual value is unused.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+
+template <class Range, class Unsigned_char_type>
+inline void string_sort(Range& range, Unsigned_char_type unused)
+{
+  string_sort(boost::begin(range), boost::end(range), unused);
+}
+
+/*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
+   \n
+   <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+   which returns an integer-type right-shifted a specified number of bits.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+  template <class RandomAccessIter>
+  inline void string_sort(RandomAccessIter first, RandomAccessIter last)
+  {
+    unsigned char unused = '\0';
+    string_sort(first, last, unused);
+  }
+
+/*! \brief String sort algorithm using range, wraps using default of unsigned char.
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
+   \n
+   <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \param[in] range Range [first, last) for sorting.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+template <class Range>
+inline void string_sort(Range& range)
+{
+  string_sort(boost::begin(range), boost::end(range));
+}
+
+/*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
+
+  (All variants fall back to @c std::sort if the data size is too small, < detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+
+   \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
+   \tparam Comp Functor type to use for comparison.
+   \tparam Unsigned_char_type Unsigned character type used for string.
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+   \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type.  The actual value is unused.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+   which returns an integer-type right-shifted a specified number of bits.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+*/
+  template <class RandomAccessIter, class Compare, class Unsigned_char_type>
+  inline void reverse_string_sort(RandomAccessIter first,
+                RandomAccessIter last, Compare comp, Unsigned_char_type unused)
+  {
+    //Don't sort if it's too small to optimize.
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last, comp);
+    else
+      detail::reverse_string_sort(first, last, unused);
+  }
+
+/*! \brief String sort algorithm using range, allowing character-type overloads.
+
+  (All variants fall back to @c std::sort if the data size is too small, < detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+   <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
+   \n
+   <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
+
+
+   \tparam Comp Functor type to use for comparison.
+   \tparam Unsigned_char_type Unsigned character type used for string.
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+   \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type.  The actual value is unused.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+*/
+template <class Range, class Compare, class Unsigned_char_type>
+inline void reverse_string_sort(Range& range, Compare comp, Unsigned_char_type unused)
+{
+  reverse_string_sort(boost::begin(range), boost::end(range), comp, unused);
+}
+
+/*! \brief String sort algorithm using random access iterators,  wraps using default of @c unsigned char.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms.\n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+   which returns an integer-type right-shifted a specified number of bits.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+*/
+  template <class RandomAccessIter, class Compare>
+  inline void reverse_string_sort(RandomAccessIter first,
+                                  RandomAccessIter last, Compare comp)
+  {
+    unsigned char unused = '\0';
+    reverse_string_sort(first, last, comp, unused);
+  }
+
+/*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+*/
+template <class Range, class Compare>
+inline void reverse_string_sort(Range& range, Compare comp)
+{
+  reverse_string_sort(boost::begin(range), boost::end(range), comp);
+}
+
+/*! \brief String sort algorithm using random access iterators,  wraps using default of @c unsigned char.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
+   \param[in] length Functor to get the length of the string in characters.
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \pre @c RandomAccessIter @c value_type supports the @c operator>>,
+   which returns an integer-type right-shifted a specified number of bits.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+  template <class RandomAccessIter, class Get_char, class Get_length>
+  inline void string_sort(RandomAccessIter first, RandomAccessIter last,
+                          Get_char get_character, Get_length length)
+  {
+    //Don't sort if it's too small to optimize
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last);
+    else {
+      //skipping past empties, which allows us to get the character type
+      //.empty() is not used so as not to require a user declaration of it
+      while (!length(*first)) {
+        if (++first == last)
+          return;
+      }
+      detail::string_sort(first, last, get_character, length, get_character((*first), 0));
+    }
+  }
+
+/*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
+   \param[in] length Functor to get the length of the string in characters.
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws  std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+template <class Range, class Get_char, class Get_length>
+inline void string_sort(Range& range, Get_char get_character, Get_length length)
+{
+  string_sort(boost::begin(range), boost::end(range), get_character, length);
+}
+
+
+/*! \brief String sort algorithm using random access iterators,  wraps using default of @c unsigned char.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
+   \param[in] length Functor to get the length of the string in characters.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+  template <class RandomAccessIter, class Get_char, class Get_length,
+            class Compare>
+  inline void string_sort(RandomAccessIter first, RandomAccessIter last,
+                          Get_char get_character, Get_length length, Compare comp)
+  {
+    //Don't sort if it's too small to optimize
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last, comp);
+    else {
+      //skipping past empties, which allows us to get the character type
+      //.empty() is not used so as not to require a user declaration of it
+      while (!length(*first)) {
+        if (++first == last)
+          return;
+      }
+      detail::string_sort(first, last, get_character, length, comp,
+                          get_character((*first), 0));
+    }
+  }
+
+/*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+  \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
+   \param[in] length Functor to get the length of the string in characters.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+template <class Range, class Get_char, class Get_length, class Compare>
+inline void string_sort(Range& range,
+                        Get_char get_character, Get_length length, Compare comp)
+{
+  string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
+}
+
+/*! \brief Reverse String sort algorithm using random access iterators.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+ \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+
+   \param[in] first Iterator pointer to first element.
+   \param[in] last Iterator pointing to one beyond the end of data.
+   \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
+   \param[in] length Functor to get the length of the string in characters.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+
+   \pre [@c first, @c last) is a valid range.
+   \pre @c RandomAccessIter @c value_type is mutable.
+   \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+  template <class RandomAccessIter, class Get_char, class Get_length,
+            class Compare>
+  inline void reverse_string_sort(RandomAccessIter first,
+    RandomAccessIter last, Get_char get_character, Get_length length, Compare comp)
+  {
+    //Don't sort if it's too small to optimize
+    if (last - first < detail::min_sort_size)
+      std::sort(first, last, comp);
+    else {
+      //skipping past empties, which allows us to get the character type
+      //.empty() is not used so as not to require a user declaration of it
+      while (!length(*(--last))) {
+        //If there is just one non-empty at the beginning, this is sorted
+        if (first == last)
+          return;
+      }
+      //making last just after the end of the non-empty part of the array
+      detail::reverse_string_sort(first, last + 1, get_character, length, comp,
+                                  get_character((*last), 0));
+    }
+  }
+
+/*! \brief Reverse String sort algorithm using range.
+
+  (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
+
+ \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
+which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
+\par
+Worst-case performance is <em>  O(N * (lg(range)/s + s)) </em>,
+so @c string_sort is asymptotically faster
+than pure comparison-based algorithms. \n\n
+Some performance plots of runtime vs. n and log(range) are provided:\n
+<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
+<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
+
+
+   \param[in] range Range [first, last) for sorting.
+   \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
+   \param[in] length Functor to get the length of the string in characters.
+   \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
+
+
+   \pre [@c first, @c last) is a valid range.
+   \post The elements in the range [@c first, @c last) are sorted in ascending order.
+
+   \return @c void.
+
+   \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
+   the right shift, subtraction of right-shifted elements, functors,
+   or any operations on iterators throw.
+
+   \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
+   \warning Invalid arguments cause undefined behaviour.
+   \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
+   enabling faster generic-programming.
+
+   \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
+   \remark  *  N is @c last - @c first,
+   \remark  *  K is the log of the range in bits (32 for 32-bit integers using their full range),
+   \remark  *  S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
+
+*/
+template <class Range, class Get_char, class Get_length,
+        class Compare>
+inline void reverse_string_sort(Range& range, Get_char get_character, Get_length length, Compare comp)
+{
+    reverse_string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
+}
+}
+}
+}
+
+#endif
author	DongHun Kwak <dh0128.kwak@samsung.com>	2019-12-05 15:12:59 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2019-12-05 15:12:59 +0900
commit	b8cf34c691623e4ec329053cbbf68522a855882d (patch)
tree	34da08632a99677f6b79ecb65e5b655a5b69a67f /boost/sort
parent	3fdc3e5ee96dca5b11d1694975a65200787eab86 (diff)
download	boost-b8cf34c691623e4ec329053cbbf68522a855882d.tar.gz boost-b8cf34c691623e4ec329053cbbf68522a855882d.tar.bz2 boost-b8cf34c691623e4ec329053cbbf68522a855882d.zip