bench_merge.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. //////////////////////////////////////////////////////////////////////////////
  2. //
  3. // (C) Copyright Ion Gaztanaga 2015-2016.
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // See http://www.boost.org/libs/move for documentation.
  9. //
  10. //////////////////////////////////////////////////////////////////////////////
  11. //#define BOOST_MOVE_ADAPTIVE_SORT_STATS
  12. //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2
  13. #include <algorithm> //std::inplace_merge
  14. #include <cstdio> //std::printf
  15. #include <iostream> //std::cout
  16. #include <boost/container/vector.hpp> //boost::container::vector
  17. #include <boost/config.hpp>
  18. #include <boost/move/unique_ptr.hpp>
  19. #include <boost/timer/timer.hpp>
  20. #include "order_type.hpp"
  21. #include "random_shuffle.hpp"
  22. using boost::timer::cpu_timer;
  23. using boost::timer::cpu_times;
  24. using boost::timer::nanosecond_type;
  25. void print_stats(const char *str, boost::ulong_long_type element_count)
  26. {
  27. std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
  28. }
  29. #include <boost/move/algo/adaptive_merge.hpp>
  30. #include <boost/move/algo/detail/merge.hpp>
  31. #include <boost/move/core.hpp>
  32. template<class T, class Compare>
  33. std::size_t generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK, Compare comp)
  34. {
  35. elements.resize(L);
  36. boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]);
  37. std::srand(0);
  38. for (std::size_t i = 0; i < (NK ? NK : L); ++i) {
  39. key_reps[i] = 0;
  40. }
  41. for (std::size_t i = 0; i < L; ++i) {
  42. std::size_t key = NK ? (i % NK) : i;
  43. elements[i].key = key;
  44. }
  45. ::random_shuffle(elements.data(), elements.data() + L);
  46. ::random_shuffle(elements.data(), elements.data() + L);
  47. for (std::size_t i = 0; i < L; ++i) {
  48. elements[i].val = key_reps[elements[i].key]++;
  49. }
  50. std::size_t split_count = L / 2;
  51. std::stable_sort(elements.data(), elements.data() + split_count, comp);
  52. std::stable_sort(elements.data() + split_count, elements.data() + L, comp);
  53. return split_count;
  54. }
  55. template<class T, class Compare>
  56. void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen)
  57. {
  58. boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
  59. boost::movelib::adaptive_merge(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen);
  60. }
  61. template<class T, class Compare>
  62. void std_like_adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen)
  63. {
  64. boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
  65. boost::movelib::merge_adaptive_ONlogN(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen);
  66. }
  67. enum AlgoType
  68. {
  69. StdMerge,
  70. AdaptMerge,
  71. SqrtHAdaptMerge,
  72. SqrtAdaptMerge,
  73. Sqrt2AdaptMerge,
  74. QuartAdaptMerge,
  75. StdInplaceMerge,
  76. StdSqrtHAdaptMerge,
  77. StdSqrtAdaptMerge,
  78. StdSqrt2AdaptMerge,
  79. StdQuartAdaptMerge,
  80. MaxMerge
  81. };
  82. const char *AlgoNames [] = { "StdMerge "
  83. , "AdaptMerge "
  84. , "SqrtHAdaptMerge "
  85. , "SqrtAdaptMerge "
  86. , "Sqrt2AdaptMerge "
  87. , "QuartAdaptMerge "
  88. , "StdInplaceMerge "
  89. , "StdSqrtHAdaptMerge "
  90. , "StdSqrtAdaptMerge "
  91. , "StdSqrt2AdaptMerge "
  92. , "StdQuartAdaptMerge "
  93. };
  94. BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge);
  95. template<class T>
  96. bool measure_algo(T *elements, std::size_t element_count, std::size_t split_pos, std::size_t alg, nanosecond_type &prev_clock)
  97. {
  98. std::printf("%s ", AlgoNames[alg]);
  99. order_perf_type::num_compare=0;
  100. order_perf_type::num_copy=0;
  101. order_perf_type::num_elements = element_count;
  102. cpu_timer timer;
  103. timer.resume();
  104. switch(alg)
  105. {
  106. case StdMerge:
  107. std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less());
  108. break;
  109. case AdaptMerge:
  110. boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less());
  111. break;
  112. case SqrtHAdaptMerge:
  113. adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  114. , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
  115. break;
  116. case SqrtAdaptMerge:
  117. adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  118. , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
  119. break;
  120. case Sqrt2AdaptMerge:
  121. adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  122. , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
  123. break;
  124. case QuartAdaptMerge:
  125. adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  126. , (element_count)/4+1);
  127. break;
  128. case StdInplaceMerge:
  129. boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less());
  130. break;
  131. case StdSqrtHAdaptMerge:
  132. std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  133. , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
  134. break;
  135. case StdSqrtAdaptMerge:
  136. std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  137. , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
  138. break;
  139. case StdSqrt2AdaptMerge:
  140. std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  141. , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
  142. break;
  143. case StdQuartAdaptMerge:
  144. std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
  145. , (element_count)/4+1);
  146. break;
  147. }
  148. timer.stop();
  149. if(order_perf_type::num_elements == element_count){
  150. std::printf(" Tmp Ok ");
  151. } else{
  152. std::printf(" Tmp KO ");
  153. }
  154. nanosecond_type new_clock = timer.elapsed().wall;
  155. //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument
  156. std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
  157. double time = double(new_clock);
  158. const char *units = "ns";
  159. if(time >= 1000000000.0){
  160. time /= 1000000000.0;
  161. units = " s";
  162. }
  163. else if(time >= 1000000.0){
  164. time /= 1000000.0;
  165. units = "ms";
  166. }
  167. else if(time >= 1000.0){
  168. time /= 1000.0;
  169. units = "us";
  170. }
  171. std::printf(" %6.02f%s (%6.02f)\n"
  172. , time
  173. , units
  174. , prev_clock ? double(new_clock)/double(prev_clock): 1.0);
  175. prev_clock = new_clock;
  176. bool res = is_order_type_ordered(elements, element_count, true);
  177. return res;
  178. }
  179. template<class T>
  180. bool measure_all(std::size_t L, std::size_t NK)
  181. {
  182. boost::container::vector<T> original_elements, elements;
  183. std::size_t split_pos = generate_elements(original_elements, L, NK, order_type_less());
  184. std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK);
  185. nanosecond_type prev_clock = 0;
  186. nanosecond_type back_clock;
  187. bool res = true;
  188. elements = original_elements;
  189. res = res && measure_algo(elements.data(), L, split_pos, StdMerge, prev_clock);
  190. back_clock = prev_clock;
  191. //
  192. prev_clock = back_clock;
  193. elements = original_elements;
  194. res = res && measure_algo(elements.data(), L, split_pos, QuartAdaptMerge, prev_clock);
  195. //
  196. prev_clock = back_clock;
  197. elements = original_elements;
  198. res = res && measure_algo(elements.data(), L, split_pos, StdQuartAdaptMerge, prev_clock);
  199. //
  200. prev_clock = back_clock;
  201. elements = original_elements;
  202. res = res && measure_algo(elements.data(), L, split_pos, Sqrt2AdaptMerge, prev_clock);
  203. //
  204. prev_clock = back_clock;
  205. elements = original_elements;
  206. res = res && measure_algo(elements.data(), L, split_pos, StdSqrt2AdaptMerge, prev_clock);
  207. //
  208. prev_clock = back_clock;
  209. elements = original_elements;
  210. res = res && measure_algo(elements.data(), L, split_pos, SqrtAdaptMerge, prev_clock);
  211. //
  212. prev_clock = back_clock;
  213. elements = original_elements;
  214. res = res && measure_algo(elements.data(), L, split_pos, StdSqrtAdaptMerge, prev_clock);
  215. //
  216. prev_clock = back_clock;
  217. elements = original_elements;
  218. res = res && measure_algo(elements.data(), L, split_pos, SqrtHAdaptMerge, prev_clock);
  219. //
  220. prev_clock = back_clock;
  221. elements = original_elements;
  222. res = res && measure_algo(elements.data(), L, split_pos, StdSqrtHAdaptMerge, prev_clock);
  223. //
  224. prev_clock = back_clock;
  225. elements = original_elements;
  226. res = res && measure_algo(elements.data(), L, split_pos, AdaptMerge, prev_clock);
  227. //
  228. prev_clock = back_clock;
  229. elements = original_elements;
  230. res = res && measure_algo(elements.data(), L, split_pos,StdInplaceMerge, prev_clock);
  231. //
  232. if(!res)
  233. throw int(0);
  234. return res;
  235. }
  236. //Undef it to run the long test
  237. #define BENCH_MERGE_SHORT
  238. #define BENCH_SORT_UNIQUE_VALUES
  239. int main()
  240. {
  241. try{
  242. #ifndef BENCH_SORT_UNIQUE_VALUES
  243. measure_all<order_perf_type>(101,1);
  244. measure_all<order_perf_type>(101,5);
  245. measure_all<order_perf_type>(101,7);
  246. measure_all<order_perf_type>(101,31);
  247. #endif
  248. measure_all<order_perf_type>(101,0);
  249. //
  250. #ifndef BENCH_SORT_UNIQUE_VALUES
  251. measure_all<order_perf_type>(1101,1);
  252. measure_all<order_perf_type>(1001,7);
  253. measure_all<order_perf_type>(1001,31);
  254. measure_all<order_perf_type>(1001,127);
  255. measure_all<order_perf_type>(1001,511);
  256. #endif
  257. measure_all<order_perf_type>(1001,0);
  258. //
  259. #ifndef BENCH_SORT_UNIQUE_VALUES
  260. measure_all<order_perf_type>(10001,65);
  261. measure_all<order_perf_type>(10001,255);
  262. measure_all<order_perf_type>(10001,1023);
  263. measure_all<order_perf_type>(10001,4095);
  264. #endif
  265. measure_all<order_perf_type>(10001,0);
  266. //
  267. #if defined(NDEBUG)
  268. #ifndef BENCH_SORT_UNIQUE_VALUES
  269. measure_all<order_perf_type>(100001,511);
  270. measure_all<order_perf_type>(100001,2047);
  271. measure_all<order_perf_type>(100001,8191);
  272. measure_all<order_perf_type>(100001,32767);
  273. #endif
  274. measure_all<order_perf_type>(100001,0);
  275. //
  276. #if !defined(BENCH_MERGE_SHORT)
  277. #ifndef BENCH_SORT_UNIQUE_VALUES
  278. measure_all<order_perf_type>(1000001, 8192);
  279. measure_all<order_perf_type>(1000001, 32768);
  280. measure_all<order_perf_type>(1000001, 131072);
  281. measure_all<order_perf_type>(1000001, 524288);
  282. #endif
  283. measure_all<order_perf_type>(1000001,0);
  284. #ifndef BENCH_SORT_UNIQUE_VALUES
  285. measure_all<order_perf_type>(10000001, 65536);
  286. measure_all<order_perf_type>(10000001, 262144);
  287. measure_all<order_perf_type>(10000001, 1048576);
  288. measure_all<order_perf_type>(10000001, 4194304);
  289. #endif
  290. measure_all<order_perf_type>(10000001,0);
  291. #endif //#ifndef BENCH_MERGE_SHORT
  292. #endif //#ifdef NDEBUG
  293. }
  294. catch(...)
  295. {
  296. return 1;
  297. }
  298. return 0;
  299. }