accumulate.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Joel de Guzman
  3. Copyright (c) 2005-2006 Dan Marsden
  4. Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. ==============================================================================*/
  7. #include <boost/array.hpp>
  8. #include <boost/timer.hpp>
  9. #include <boost/fusion/algorithm/iteration/accumulate.hpp>
  10. #include <boost/fusion/algorithm/transformation/transform.hpp>
  11. #include <boost/fusion/container/vector.hpp>
  12. #include <boost/fusion/algorithm/transformation/zip.hpp>
  13. #include <boost/fusion/sequence/intrinsic/at.hpp>
  14. #include <boost/fusion/adapted/array.hpp>
  15. #include <boost/type_traits/remove_reference.hpp>
  16. #include <algorithm>
  17. #include <numeric>
  18. #include <functional>
  19. #include <iostream>
  20. #include <cmath>
  21. #include <limits>
  22. #ifdef _MSC_VER
  23. // inline aggressively
  24. # pragma inline_recursion(on) // turn on inline recursion
  25. # pragma inline_depth(255) // max inline depth
  26. #endif
  27. int const REPEAT_COUNT = 10;
  28. double const duration = 0.5;
  29. namespace
  30. {
  31. template<int N>
  32. double time_for_std_accumulate(int& j)
  33. {
  34. boost::timer tim;
  35. int i = 0;
  36. long long iter = 65536;
  37. long long counter, repeats;
  38. double result = (std::numeric_limits<double>::max)();
  39. double runtime = 0;
  40. double run;
  41. boost::array<int, N> arr;
  42. std::generate(arr.begin(), arr.end(), rand);
  43. do
  44. {
  45. tim.restart();
  46. for(counter = 0; counter < iter; ++counter)
  47. {
  48. i = std::accumulate(arr.begin(), arr.end(), 0);
  49. static_cast<void>(i);
  50. }
  51. runtime = tim.elapsed();
  52. iter *= 2;
  53. } while(runtime < duration);
  54. iter /= 2;
  55. // repeat test and report least value for consistency:
  56. for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
  57. {
  58. tim.restart();
  59. for(counter = 0; counter < iter; ++counter)
  60. {
  61. i = std::accumulate(arr.begin(), arr.end(), 0);
  62. j += i;
  63. }
  64. run = tim.elapsed();
  65. result = (std::min)(run, result);
  66. }
  67. std::cout << i << std::endl;
  68. return result / iter;
  69. }
  70. struct poly_add
  71. {
  72. template<typename Sig>
  73. struct result;
  74. template<typename Lhs, typename Rhs>
  75. struct result<poly_add(Lhs,Rhs)>
  76. : boost::remove_reference<Lhs>
  77. {};
  78. template<typename Lhs, typename Rhs>
  79. Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
  80. {
  81. return lhs + rhs;
  82. }
  83. };
  84. struct poly_mult
  85. {
  86. template<typename Sig>
  87. struct result;
  88. template<typename Lhs, typename Rhs>
  89. struct result<poly_mult(Lhs, Rhs)>
  90. : boost::remove_reference<Lhs>
  91. {};
  92. template<typename Lhs, typename Rhs>
  93. Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
  94. {
  95. return lhs * rhs;
  96. }
  97. };
  98. template<int N>
  99. double time_for_fusion_accumulate(int& j)
  100. {
  101. boost::timer tim;
  102. int i = 0;
  103. long long iter = 65536;
  104. long long counter, repeats;
  105. double result = (std::numeric_limits<double>::max)();
  106. double runtime = 0;
  107. double run;
  108. boost::array<int, N> arr;
  109. std::generate(arr.begin(), arr.end(), rand);
  110. do
  111. {
  112. tim.restart();
  113. for(counter = 0; counter < iter; ++counter)
  114. {
  115. i = boost::fusion::accumulate(arr, 0, poly_add());
  116. static_cast<void>(i);
  117. }
  118. runtime = tim.elapsed();
  119. iter *= 2;
  120. } while(runtime < duration);
  121. iter /= 2;
  122. std::cout << iter << " iterations" << std::endl;
  123. // repeat test and report least value for consistency:
  124. for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
  125. {
  126. tim.restart();
  127. for(counter = 0; counter < iter; ++counter)
  128. {
  129. i = boost::fusion::accumulate(arr, 0, poly_add());
  130. j += i;
  131. }
  132. run = tim.elapsed();
  133. result = (std::min)(run, result);
  134. std::cout << ".";
  135. std::cout.flush();
  136. }
  137. std::cout << i << std::endl;
  138. return result / iter;
  139. }
  140. #if 0
  141. template<int N>
  142. double time_for_std_inner_product(int& j)
  143. {
  144. boost::timer tim;
  145. int i = 0;
  146. long long iter = 65536;
  147. long long counter, repeats;
  148. double result = (std::numeric_limits<double>::max)();
  149. double runtime = 0;
  150. double run;
  151. boost::array<int, N> arr1;
  152. boost::array<int, N> arr2;
  153. std::generate(arr1.begin(), arr1.end(), rand);
  154. std::generate(arr2.begin(), arr2.end(), rand);
  155. do
  156. {
  157. tim.restart();
  158. for(counter = 0; counter < iter; ++counter)
  159. {
  160. i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
  161. static_cast<void>(i);
  162. }
  163. runtime = tim.elapsed();
  164. iter *= 2;
  165. } while(runtime < duration);
  166. iter /= 2;
  167. // repeat test and report least value for consistency:
  168. for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
  169. {
  170. tim.restart();
  171. for(counter = 0; counter < iter; ++counter)
  172. {
  173. i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
  174. j += i;
  175. }
  176. run = tim.elapsed();
  177. result = (std::min)(run, result);
  178. }
  179. std::cout << i << std::endl;
  180. return result / iter;
  181. }
  182. template<int N>
  183. double time_for_fusion_inner_product(int& j)
  184. {
  185. boost::timer tim;
  186. int i = 0;
  187. long long iter = 65536;
  188. long long counter, repeats;
  189. double result = (std::numeric_limits<double>::max)();
  190. double runtime = 0;
  191. double run;
  192. boost::array<int, N> arr1;
  193. boost::array<int, N> arr2;
  194. std::generate(arr1.begin(), arr1.end(), rand);
  195. std::generate(arr2.begin(), arr2.end(), rand);
  196. do
  197. {
  198. tim.restart();
  199. for(counter = 0; counter < iter; ++counter)
  200. {
  201. i = boost::fusion::accumulate(
  202. boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add());
  203. static_cast<void>(i);
  204. }
  205. runtime = tim.elapsed();
  206. iter *= 2;
  207. } while(runtime < duration);
  208. iter /= 2;
  209. // repeat test and report least value for consistency:
  210. for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
  211. {
  212. tim.restart();
  213. for(counter = 0; counter < iter; ++counter)
  214. {
  215. i = boost::fusion::accumulate(
  216. boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add());
  217. j += i;
  218. }
  219. run = tim.elapsed();
  220. result = (std::min)(run, result);
  221. }
  222. std::cout << i << std::endl;
  223. return result / iter;
  224. }
  225. struct poly_combine
  226. {
  227. template<typename Lhs, typename Rhs>
  228. struct result
  229. {
  230. typedef Lhs type;
  231. };
  232. template<typename Lhs, typename Rhs>
  233. typename result<Lhs,Rhs>::type
  234. operator()(const Lhs& lhs, const Rhs& rhs) const
  235. {
  236. return lhs + boost::fusion::at_c<0>(rhs) * boost::fusion::at_c<1>(rhs);
  237. }
  238. };
  239. template<int N>
  240. double time_for_fusion_inner_product2(int& j)
  241. {
  242. boost::timer tim;
  243. int i = 0;
  244. long long iter = 65536;
  245. long long counter, repeats;
  246. double result = (std::numeric_limits<double>::max)();
  247. double runtime = 0;
  248. double run;
  249. boost::array<int, N> arr1;
  250. boost::array<int, N> arr2;
  251. std::generate(arr1.begin(), arr1.end(), rand);
  252. std::generate(arr2.begin(), arr2.end(), rand);
  253. do
  254. {
  255. tim.restart();
  256. for(counter = 0; counter < iter; ++counter)
  257. {
  258. i = boost::fusion::accumulate(
  259. boost::fusion::zip(arr1, arr2), 0, poly_combine());
  260. static_cast<void>(i);
  261. }
  262. runtime = tim.elapsed();
  263. iter *= 2;
  264. } while(runtime < duration);
  265. iter /= 2;
  266. std::cout << iter << " iterations" << std::endl;
  267. // repeat test and report least value for consistency:
  268. for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
  269. {
  270. tim.restart();
  271. for(counter = 0; counter < iter; ++counter)
  272. {
  273. i = boost::fusion::accumulate(
  274. boost::fusion::zip(arr1, arr2), 0, poly_combine());
  275. j += i;
  276. }
  277. run = tim.elapsed();
  278. result = (std::min)(run, result);
  279. }
  280. std::cout << i << std::endl;
  281. return result / iter;
  282. }
  283. #endif
  284. }
  285. int main()
  286. {
  287. int total = 0;
  288. int res;
  289. std::cout << "short accumulate std test " << time_for_std_accumulate<8>(res) << std::endl;
  290. total += res;
  291. std::cout << "short accumulate fusion test " << time_for_fusion_accumulate<8>(res) << std::endl;
  292. total += res;
  293. std::cout << "medium accumulate std test " << time_for_std_accumulate<64>(res) << std::endl;
  294. total += res;
  295. std::cout << "medium accumulate fusion test " << time_for_fusion_accumulate<64>(res) << std::endl;
  296. total += res;
  297. std::cout << "long accumulate std test " << time_for_std_accumulate<128>(res) << std::endl;
  298. total += res;
  299. std::cout << "long accumulate fusion test " << time_for_fusion_accumulate<128>(res) << std::endl;
  300. total += res;
  301. #if 0
  302. std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl;
  303. total += res;
  304. std::cout << "short inner_product fusion test " << time_for_fusion_inner_product<8>(res) << std::endl;
  305. total += res;
  306. std::cout << "short inner_product fusion 2 test " << time_for_fusion_inner_product2<8>(res) << std::endl;
  307. total += res;
  308. std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl;
  309. total += res;
  310. std::cout << "medium inner_product fusion test " << time_for_fusion_inner_product<64>(res) << std::endl;
  311. total += res;
  312. std::cout << "medium inner_product fusion 2 test " << time_for_fusion_inner_product2<64>(res) << std::endl;
  313. total += res;
  314. std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl;
  315. total += res;
  316. std::cout << "long inner_product fusion test " << time_for_fusion_inner_product<128>(res) << std::endl;
  317. total += res;
  318. std::cout << "long inner_product fusion 2 test " << time_for_fusion_inner_product2<128>(res) << std::endl;
  319. total += res;
  320. #endif
  321. return total;
  322. }