sample_sort.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. //----------------------------------------------------------------------------
  2. /// @file sample_sort.hpp
  3. /// @brief contains the class sample_sort
  4. ///
  5. /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
  6. /// Distributed under the Boost Software License, Version 1.0.\n
  7. /// ( See accompanying file LICENSE_1_0.txt or copy at
  8. /// http://www.boost.org/LICENSE_1_0.txt )
  9. /// @version 0.1
  10. ///
  11. /// @remarks
  12. //-----------------------------------------------------------------------------
  13. #ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
  14. #define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
  15. #include <functional>
  16. #include <future>
  17. #include <iterator>
  18. #include <memory>
  19. #include <type_traits>
  20. #include <vector>
  21. #include <algorithm>
  22. #include <boost/sort/spinsort/spinsort.hpp>
  23. #include <boost/sort/common/indirect.hpp>
  24. #include <boost/sort/common/util/atomic.hpp>
  25. #include <boost/sort/common/merge_four.hpp>
  26. #include <boost/sort/common/merge_vector.hpp>
  27. #include <boost/sort/common/range.hpp>
  28. namespace boost
  29. {
  30. namespace sort
  31. {
  32. namespace sample_detail
  33. {
  34. //---------------------------------------------------------------------------
  35. // USING SENTENCES
  36. //---------------------------------------------------------------------------
  37. namespace bsc = boost::sort::common;
  38. namespace bss = boost::sort::spin_detail;
  39. namespace bscu = boost::sort::common::util;
  40. using bsc::range;
  41. using bscu::atomic_add;
  42. using bsc::merge_vector4;
  43. using bsc::uninit_merge_level4;
  44. using bsc::less_ptr_no_null;
  45. //
  46. ///---------------------------------------------------------------------------
  47. /// @struct sample_sort
  48. /// @brief This a structure for to implement a sample sort, exception
  49. /// safe
  50. /// @tparam
  51. /// @remarks
  52. //----------------------------------------------------------------------------
  53. template<class Iter_t, class Compare>
  54. struct sample_sort
  55. {
  56. //------------------------------------------------------------------------
  57. // DEFINITIONS
  58. //------------------------------------------------------------------------
  59. typedef value_iter<Iter_t> value_t;
  60. typedef range<Iter_t> range_it;
  61. typedef range<value_t *> range_buf;
  62. typedef sample_sort<Iter_t, Compare> this_t;
  63. //------------------------------------------------------------------------
  64. // VARIABLES AND CONSTANTS
  65. //------------------------------------------------------------------------
  66. // minimun numbers of elements for to be sortd in parallel mode
  67. static const uint32_t thread_min = (1 << 16);
  68. // Number of threads to use in the algorithm
  69. // Number of intervals for to do the internal division of the data
  70. uint32_t nthread, ninterval;
  71. // Bool variables indicating if the auxiliary memory is constructed
  72. // and indicating in the auxiliary memory had been obtained inside the
  73. /// algorithm or had been received as a parameter
  74. bool construct = false, owner = false;
  75. // Comparison object for to compare two elements
  76. Compare comp;
  77. // Range with all the elements to sort
  78. range_it global_range;
  79. // range with the auxiliary memory
  80. range_buf global_buf;
  81. // vector of futures
  82. std::vector<std::future<void>> vfuture;
  83. // vector of vectors which contains the ranges to merge obtained in the
  84. // subdivision
  85. std::vector<std::vector<range_it>> vv_range_it;
  86. // each vector of ranges of the vv_range_it, need their corresponding buffer
  87. // for to do the merge
  88. std::vector<std::vector<range_buf>> vv_range_buf;
  89. // Initial vector of ranges
  90. std::vector<range_it> vrange_it_ini;
  91. // Initial vector of buffers
  92. std::vector<range_buf> vrange_buf_ini;
  93. // atomic counter for to know when are finished the function_t created
  94. // inside a function
  95. std::atomic<uint32_t> njob;
  96. // Indicate if an error in the algorithm for to undo all
  97. bool error;
  98. //------------------------------------------------------------------------
  99. // FUNCTIONS OF THE STRUCT
  100. //------------------------------------------------------------------------
  101. void initial_configuration(void);
  102. sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
  103. value_t *paux, size_t naux);
  104. sample_sort(Iter_t first, Iter_t last)
  105. : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(),
  106. nullptr, 0) { };
  107. sample_sort(Iter_t first, Iter_t last, Compare cmp)
  108. : sample_sort(first, last, cmp, std::thread::hardware_concurrency(),
  109. nullptr, 0) { };
  110. sample_sort(Iter_t first, Iter_t last, uint32_t num_thread)
  111. : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { };
  112. sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
  113. : sample_sort(first, last, cmp, num_thread, nullptr, 0) { };
  114. sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
  115. range_buf range_buf_initial)
  116. : sample_sort(first, last, cmp, num_thread,
  117. range_buf_initial.first, range_buf_initial.size()) { };
  118. void destroy_all(void);
  119. //
  120. //-----------------------------------------------------------------------------
  121. // function :~sample_sort
  122. /// @brief destructor of the class. The utility is to destroy the temporary
  123. /// buffer used in the sorting process
  124. //-----------------------------------------------------------------------------
  125. ~sample_sort(void) { destroy_all(); };
  126. //
  127. //-----------------------------------------------------------------------
  128. // function : execute first
  129. /// @brief this a function to assign to each thread in the first merge
  130. //-----------------------------------------------------------------------
  131. void execute_first(void)
  132. {
  133. uint32_t job = 0;
  134. while ((job = atomic_add(njob, 1)) < ninterval)
  135. {
  136. uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job],
  137. vv_range_buf[job], comp);
  138. };
  139. };
  140. //
  141. //-----------------------------------------------------------------------
  142. // function : execute
  143. /// @brief this is a function to assignt each thread the final merge
  144. //-----------------------------------------------------------------------
  145. void execute(void)
  146. {
  147. uint32_t job = 0;
  148. while ((job = atomic_add(njob, 1)) < ninterval)
  149. {
  150. merge_vector4(vrange_buf_ini[job], vrange_it_ini[job],
  151. vv_range_buf[job], vv_range_it[job], comp);
  152. };
  153. };
  154. //
  155. //-----------------------------------------------------------------------
  156. // function : first merge
  157. /// @brief Implement the merge of the initially sparse ranges
  158. //-----------------------------------------------------------------------
  159. void first_merge(void)
  160. { //---------------------------------- begin --------------------------
  161. njob = 0;
  162. for (uint32_t i = 0; i < nthread; ++i)
  163. {
  164. vfuture[i] = std::async(std::launch::async, &this_t::execute_first,
  165. this);
  166. };
  167. for (uint32_t i = 0; i < nthread; ++i)
  168. vfuture[i].get();
  169. };
  170. //
  171. //-----------------------------------------------------------------------
  172. // function : final merge
  173. /// @brief Implement the final merge of the ranges
  174. //-----------------------------------------------------------------------
  175. void final_merge(void)
  176. { //---------------------------------- begin --------------------------
  177. njob = 0;
  178. for (uint32_t i = 0; i < nthread; ++i)
  179. {
  180. vfuture[i] = std::async(std::launch::async, &this_t::execute, this);
  181. };
  182. for (uint32_t i = 0; i < nthread; ++i)
  183. vfuture[i].get();
  184. };
  185. //----------------------------------------------------------------------------
  186. };
  187. // End class sample_sort
  188. //----------------------------------------------------------------------------
  189. //
  190. //############################################################################
  191. // ##
  192. // N O N I N L I N E F U N C T I O N S ##
  193. // ##
  194. // ##
  195. //############################################################################
  196. //
  197. //-----------------------------------------------------------------------------
  198. // function : sample_sort
  199. /// @brief constructor of the class
  200. ///
  201. /// @param first : iterator to the first element of the range to sort
  202. /// @param last : iterator after the last element to the range to sort
  203. /// @param cmp : object for to compare two elements pointed by Iter_t iterators
  204. /// @param num_thread : Number of threads to use in the process. When this value
  205. /// is lower than 2, the sorting is done with 1 thread
  206. /// @param paux : pointer to the auxiliary memory. If nullptr, the memory is
  207. /// created inside the class
  208. /// @param naux : number of elements of the memory pointed by paux
  209. //-----------------------------------------------------------------------------
  210. template<class Iter_t, typename Compare>
  211. sample_sort<Iter_t, Compare>
  212. ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
  213. value_t *paux, size_t naux)
  214. : nthread(num_thread), owner(false), comp(cmp), global_range(first, last),
  215. global_buf(nullptr, nullptr), error(false)
  216. {
  217. assert((last - first) >= 0);
  218. size_t nelem = size_t(last - first);
  219. construct = false;
  220. njob = 0;
  221. vfuture.resize(nthread);
  222. // Adjust when have many threads and only a few elements
  223. while (nelem > thread_min and (nthread * nthread) > (nelem >> 3))
  224. {
  225. nthread /= 2;
  226. };
  227. ninterval = (nthread << 3);
  228. if (nthread < 2 or nelem <= (thread_min))
  229. {
  230. bss::spinsort<Iter_t, Compare>(first, last, comp);
  231. return;
  232. };
  233. //------------------- check if sort --------------------------------------
  234. bool sw = true;
  235. for (Iter_t it1 = first, it2 = first + 1;
  236. it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
  237. if (sw) return;
  238. //------------------- check if reverse sort ---------------------------
  239. sw = true;
  240. for (Iter_t it1 = first, it2 = first + 1;
  241. it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
  242. if (sw)
  243. {
  244. size_t nelem2 = nelem >> 1;
  245. Iter_t it1 = first, it2 = last - 1;
  246. for (size_t i = 0; i < nelem2; ++i)
  247. std::swap(*(it1++), *(it2--));
  248. return;
  249. };
  250. if (paux != nullptr)
  251. {
  252. assert(naux != 0);
  253. global_buf.first = paux;
  254. global_buf.last = paux + naux;
  255. owner = false;
  256. }
  257. else
  258. {
  259. value_t *ptr = std::get_temporary_buffer<value_t>(nelem).first;
  260. if (ptr == nullptr) throw std::bad_alloc();
  261. owner = true;
  262. global_buf = range_buf(ptr, ptr + nelem);
  263. };
  264. //------------------------------------------------------------------------
  265. // PROCESS
  266. //------------------------------------------------------------------------
  267. try
  268. {
  269. initial_configuration();
  270. } catch (std::bad_alloc &)
  271. {
  272. error = true;
  273. };
  274. if (not error)
  275. {
  276. first_merge();
  277. construct = true;
  278. final_merge();
  279. };
  280. if (error)
  281. {
  282. destroy_all();
  283. throw std::bad_alloc();
  284. };
  285. }
  286. ;
  287. //
  288. //-----------------------------------------------------------------------------
  289. // function : destroy_all
  290. /// @brief destructor of the class. The utility is to destroy the temporary
  291. /// buffer used in the sorting process
  292. //-----------------------------------------------------------------------------
  293. template<class Iter_t, typename Compare>
  294. void sample_sort<Iter_t, Compare>::destroy_all(void)
  295. {
  296. if (construct)
  297. {
  298. destroy(global_buf);
  299. construct = false;
  300. }
  301. if (global_buf.first != nullptr and owner)
  302. std::return_temporary_buffer(global_buf.first);
  303. }
  304. //
  305. //-----------------------------------------------------------------------------
  306. // function : initial_configuration
  307. /// @brief Create the internal data structures, and obtain the inital set of
  308. /// ranges to merge
  309. //-----------------------------------------------------------------------------
  310. template<class Iter_t, typename Compare>
  311. void sample_sort<Iter_t, Compare>::initial_configuration(void)
  312. {
  313. std::vector<range_it> vmem_thread;
  314. std::vector<range_buf> vbuf_thread;
  315. size_t nelem = global_range.size();
  316. //------------------------------------------------------------------------
  317. size_t cupo = nelem / nthread;
  318. Iter_t it_first = global_range.first;
  319. value_t *buf_first = global_buf.first;
  320. vmem_thread.reserve(nthread + 1);
  321. vbuf_thread.reserve(nthread + 1);
  322. for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first +=
  323. cupo)
  324. {
  325. vmem_thread.emplace_back(it_first, it_first + cupo);
  326. vbuf_thread.emplace_back(buf_first, buf_first + cupo);
  327. };
  328. vmem_thread.emplace_back(it_first, global_range.last);
  329. vbuf_thread.emplace_back(buf_first, global_buf.last);
  330. //------------------------------------------------------------------------
  331. // Sorting of the ranges
  332. //------------------------------------------------------------------------
  333. std::vector<std::future<void>> vfuture(nthread);
  334. for (uint32_t i = 0; i < nthread; ++i)
  335. {
  336. auto func = [=]()
  337. {
  338. bss::spinsort<Iter_t, Compare> (vmem_thread[i].first,
  339. vmem_thread[i].last, comp,
  340. vbuf_thread[i]);
  341. };
  342. vfuture[i] = std::async(std::launch::async, func);
  343. };
  344. for (uint32_t i = 0; i < nthread; ++i)
  345. vfuture[i].get();
  346. //------------------------------------------------------------------------
  347. // Obtain the vector of milestones
  348. //------------------------------------------------------------------------
  349. std::vector<Iter_t> vsample;
  350. vsample.reserve(nthread * (ninterval - 1));
  351. for (uint32_t i = 0; i < nthread; ++i)
  352. {
  353. size_t distance = vmem_thread[i].size() / ninterval;
  354. for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
  355. {
  356. vsample.push_back(vmem_thread[i].first + pos);
  357. };
  358. };
  359. typedef less_ptr_no_null<Iter_t, Compare> compare_ptr;
  360. typedef typename std::vector<Iter_t>::iterator it_to_it;
  361. bss::spinsort<it_to_it, compare_ptr>(vsample.begin(), vsample.end(),
  362. compare_ptr(comp));
  363. //------------------------------------------------------------------------
  364. // Create the final milestone vector
  365. //------------------------------------------------------------------------
  366. std::vector<Iter_t> vmilestone;
  367. vmilestone.reserve(ninterval);
  368. for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread)
  369. {
  370. vmilestone.push_back(vsample[pos]);
  371. };
  372. //------------------------------------------------------------------------
  373. // Creation of the first vector of ranges
  374. //------------------------------------------------------------------------
  375. std::vector<std::vector<range<Iter_t>>>vv_range_first (nthread);
  376. for (uint32_t i = 0; i < nthread; ++i)
  377. {
  378. Iter_t itaux = vmem_thread[i].first;
  379. for (uint32_t k = 0; k < (ninterval - 1); ++k)
  380. {
  381. Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last,
  382. *vmilestone[k], comp);
  383. vv_range_first[i].emplace_back(itaux, it2);
  384. itaux = it2;
  385. };
  386. vv_range_first[i].emplace_back(itaux, vmem_thread[i].last);
  387. };
  388. //------------------------------------------------------------------------
  389. // Copy in buffer and creation of the final matrix of ranges
  390. //------------------------------------------------------------------------
  391. vv_range_it.resize(ninterval);
  392. vv_range_buf.resize(ninterval);
  393. vrange_it_ini.reserve(ninterval);
  394. vrange_buf_ini.reserve(ninterval);
  395. for (uint32_t i = 0; i < ninterval; ++i)
  396. {
  397. vv_range_it[i].reserve(nthread);
  398. vv_range_buf[i].reserve(nthread);
  399. };
  400. Iter_t it = global_range.first;
  401. value_t *it_buf = global_buf.first;
  402. for (uint32_t k = 0; k < ninterval; ++k)
  403. {
  404. size_t nelem_interval = 0;
  405. for (uint32_t i = 0; i < nthread; ++i)
  406. {
  407. size_t nelem_range = vv_range_first[i][k].size();
  408. if (nelem_range != 0)
  409. {
  410. vv_range_it[k].push_back(vv_range_first[i][k]);
  411. };
  412. nelem_interval += nelem_range;
  413. };
  414. vrange_it_ini.emplace_back(it, it + nelem_interval);
  415. vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval);
  416. it += nelem_interval;
  417. it_buf += nelem_interval;
  418. };
  419. }
  420. ;
  421. //
  422. //****************************************************************************
  423. }
  424. ;
  425. // End namespace sample_detail
  426. //****************************************************************************
  427. //
  428. namespace bscu = boost::sort::common::util;
  429. //
  430. //############################################################################
  431. // ##
  432. // ##
  433. // S A M P L E _ S O R T ##
  434. // ##
  435. // ##
  436. //############################################################################
  437. //
  438. //-----------------------------------------------------------------------------
  439. // function : sample_sort
  440. /// @brief parallel sample sort algorithm (stable sort)
  441. ///
  442. /// @param first : iterator to the first element of the range to sort
  443. /// @param last : iterator after the last element to the range to sort
  444. //-----------------------------------------------------------------------------
  445. template<class Iter_t>
  446. void sample_sort(Iter_t first, Iter_t last)
  447. {
  448. typedef compare_iter<Iter_t> Compare;
  449. sample_detail::sample_sort<Iter_t, Compare>(first, last);
  450. };
  451. //
  452. //-----------------------------------------------------------------------------
  453. // function : sample_sort
  454. /// @brief parallel sample sort algorithm (stable sort)
  455. ///
  456. /// @param first : iterator to the first element of the range to sort
  457. /// @param last : iterator after the last element to the range to sort
  458. /// @param nthread : Number of threads to use in the process. When this value
  459. /// is lower than 2, the sorting is done with 1 thread
  460. //-----------------------------------------------------------------------------
  461. template<class Iter_t>
  462. void sample_sort(Iter_t first, Iter_t last, uint32_t nthread)
  463. {
  464. typedef compare_iter<Iter_t> Compare;
  465. sample_detail::sample_sort<Iter_t, Compare>(first, last, nthread);
  466. };
  467. //
  468. //-----------------------------------------------------------------------------
  469. // function : sample_sort
  470. /// @brief parallel sample sort algorithm (stable sort)
  471. ///
  472. /// @param first : iterator to the first element of the range to sort
  473. /// @param last : iterator after the last element to the range to sort
  474. /// @param comp : object for to compare two elements pointed by Iter_t
  475. /// iterators
  476. //-----------------------------------------------------------------------------
  477. template<class Iter_t, class Compare, bscu::enable_if_not_integral<Compare> * =
  478. nullptr>
  479. void sample_sort(Iter_t first, Iter_t last, Compare comp)
  480. {
  481. sample_detail::sample_sort<Iter_t, Compare>(first, last, comp);
  482. };
  483. //
  484. //-----------------------------------------------------------------------------
  485. // function : sample_sort
  486. /// @brief parallel sample sort algorithm (stable sort)
  487. ///
  488. /// @param first : iterator to the first element of the range to sort
  489. /// @param last : iterator after the last element to the range to sort
  490. /// @param comp : object for to compare two elements pointed by Iter_t
  491. /// iterators
  492. /// @param nthread : Number of threads to use in the process. When this value
  493. /// is lower than 2, the sorting is done with 1 thread
  494. //-----------------------------------------------------------------------------
  495. template<class Iter_t, class Compare>
  496. void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
  497. {
  498. sample_detail::sample_sort<Iter_t, Compare>(first, last, comp, nthread);
  499. };
  500. //
  501. //****************************************************************************
  502. };// End namespace sort
  503. };// End namespace boost
  504. //****************************************************************************
  505. //
  506. #endif