//---------------------------------------------------------------------------- /// @file parallel_sort.hpp /// @brief Contains the parallel_sort class, which is part of the /// block_indirect_sort algorithm /// /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n /// Distributed under the Boost Software License, Version 1.0.\n /// ( See accompanying file LICENSE_1_0.txt or copy at /// http://www.boost.org/LICENSE_1_0.txt ) /// @version 0.1 /// /// @remarks //----------------------------------------------------------------------------- #ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP #define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP #include #include #include namespace boost { namespace sort { namespace blk_detail { //---------------------------------------------------------------------------- // USING SENTENCES //---------------------------------------------------------------------------- namespace bsc = boost::sort::common; namespace bscu = bsc::util; using bscu::nbits64; using bsc::pivot9; using boost::sort::pdqsort; // ///--------------------------------------------------------------------------- /// @struct parallel_sort /// @brief This class do a parallel sort, using the quicksort filtering, /// splitting the data until the number of elements is smaller than a /// predefined value (max_per_thread) //---------------------------------------------------------------------------- template struct parallel_sort { //------------------------------------------------------------------------- // D E F I N I T I O N S //------------------------------------------------------------------------- typedef typename std::iterator_traits::value_type value_t; typedef std::atomic atomic_t; typedef std::function function_t; typedef backbone backbone_t; //------------------------------------------------------------------------ // V A R I A B L E S //------------------------------------------------------------------------ // reference to a object with all the data to sort backbone_t &bk; // maximun number of element to sort woth 1 thread size_t max_per_thread; // atomic counter for to detect the end of the works created inside // the object atomic_t counter; //------------------------------------------------------------------------ // F U N C T I O N S //------------------------------------------------------------------------ parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last); void divide_sort(Iter_t first, Iter_t last, uint32_t level); // //------------------------------------------------------------------------ // function : function_divide_sort /// @brief create a function_t with a call to divide_sort, and inser in /// the stack of the backbone // /// @param first : iterator to the first element of the range to divide /// @param last : iterator to the next element after the last element of /// the range to divide /// @param level : level of depth in the division.When zero call to /// pdqsort /// @param counter : atomic variable which is decremented when finish /// the function. This variable is used for to know /// when are finished all the function_t created /// inside an object /// @param error : global indicator of error. //------------------------------------------------------------------------ void function_divide_sort(Iter_t first, Iter_t last, uint32_t level, atomic_t &counter, bool &error) { bscu::atomic_add(counter, 1); function_t f1 = [this, first, last, level, &counter, &error]( ) { if (not error) { try { this->divide_sort (first, last, level); } catch (std::bad_alloc &) { error = true; }; }; bscu::atomic_sub (counter, 1); }; bk.works.emplace_back(f1); }; //-------------------------------------------------------------------------- };// end struct parallel_sort //-------------------------------------------------------------------------- // //############################################################################ // ## // ## // N O N I N L I N E F U N C T I O N S ## // ## // ## //############################################################################ // //------------------------------------------------------------------------ // function : parallel_sort /// @brief constructor of the class /// @param [in] bkbn : backbone struct with all the information to sort /// @param [in] first : iterator to the first element to sort /// @param [in] last : iterator to the next element after the last //------------------------------------------------------------------------ template parallel_sort ::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last) : bk(bkbn), counter(0) { assert((last - first) >= 0); size_t nelem = size_t(last - first); //------------------- check if sort -------------------------------------- bool sorted = true; for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++); if (sorted) return; //------------------- check if reverse sort --------------------------- sorted = true; for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++); if (sorted) { size_t nelem2 = nelem >> 1; Iter_t it1 = first, it2 = last - 1; for (size_t i = 0; i < nelem2; ++i) std::swap(*(it1++), *(it2--)); return; }; //-------------------max_per_thread --------------------------- uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1; if (nbits_size > 5) nbits_size = 5; max_per_thread = 1 << (18 - nbits_size); uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2; //---------------- check if only single thread ----------------------- if (nelem < (max_per_thread)) { pdqsort(first, last, bk.cmp); return; }; if (not bk.error) divide_sort(first, last, level); // wait until all the parts are finished bk.exec(counter); }; //------------------------------------------------------------------------ // function : divide_sort /// @brief this function divide the data in two part, for to be sorted in /// a parallel mode /// @param first : iterator to the first element to sort /// @param last : iterator to the next element after the last /// @param level : level of depth before call to pdqsort //------------------------------------------------------------------------ template void parallel_sort ::divide_sort(Iter_t first, Iter_t last, uint32_t level) { //------------------- check if sort ----------------------------------- bool sorted = true; for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++); if (sorted) return; //---------------- check if finish the subdivision ------------------- size_t nelem = last - first; if (level == 0 or nelem < (max_per_thread)) { return pdqsort(first, last, bk.cmp); }; //-------------------- pivoting ---------------------------------- pivot9(first, last, bk.cmp); const value_t &val = const_cast(*first); Iter_t c_first = first + 1, c_last = last - 1; while (bk.cmp(*c_first, val)) ++c_first; while (bk.cmp(val, *c_last)) --c_last; while (c_first < c_last) { std::swap(*(c_first++), *(c_last--)); while (bk.cmp(*c_first, val)) ++c_first; while (bk.cmp(val, *c_last)) --c_last; }; std::swap(*first, *c_last); // insert the work of the second half in the stack of works function_divide_sort(c_first, last, level - 1, counter, bk.error); if (bk.error) return; // The first half is done by the same thread function_divide_sort(first, c_last, level - 1, counter, bk.error); }; // //**************************************************************************** };// End namespace blk_detail };// End namespace sort };// End namespace boost //**************************************************************************** // #endif