efficiency.cpp 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. // Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2005.
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. #include <boost/parameter/name.hpp>
  6. #include <boost/config/workaround.hpp>
  7. #include <boost/timer.hpp>
  8. #include <iostream>
  9. namespace test {
  10. //
  11. // This test measures the abstraction overhead of using the named
  12. // parameter interface. Some actual test results have been recorded
  13. // in timings.txt in this source file's directory, or
  14. // http://www.boost.org/libs/parameter/test/timings.txt.
  15. //
  16. // Caveats:
  17. //
  18. // 1. This test penalizes the named parameter library slightly, by
  19. // passing two arguments through the named interface, while
  20. // only passing one through the plain C++ interface.
  21. //
  22. // 2. This test does not measure the case where an ArgumentPack is
  23. // so large that it doesn't fit in the L1 cache.
  24. //
  25. // 3. Although we've tried to make this test as general as possible,
  26. // we are targeting it at a specific application. Where that
  27. // affects design decisions, we've noted it below in ***...***.
  28. //
  29. // 4. The first time you run this program, the time may not be
  30. // representative because of disk and memory cache effects, so
  31. // always run it multiple times and ignore the first
  32. // measurement. This approach will also allow you to estimate
  33. // the statistical error of your test by observing the
  34. // variation in the valid times.
  35. //
  36. // 5. Try to run this program on a machine that's otherwise idle,
  37. // or other processes and even device hardware interrupts may
  38. // interfere by causing caches to be flushed.
  39. // Accumulator function object with plain C++ interface
  40. template <typename T>
  41. struct plain_weight_running_total
  42. {
  43. plain_weight_running_total()
  44. #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
  45. : sum(T())
  46. #else
  47. : sum()
  48. #endif
  49. {
  50. }
  51. void operator()(T w)
  52. {
  53. this->sum += w;
  54. }
  55. T sum;
  56. };
  57. BOOST_PARAMETER_NAME(weight)
  58. BOOST_PARAMETER_NAME(value)
  59. // Accumulator function object with named parameter interface
  60. template <typename T>
  61. struct named_param_weight_running_total
  62. {
  63. named_param_weight_running_total()
  64. #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
  65. : sum(T())
  66. #else
  67. : sum()
  68. #endif
  69. {
  70. }
  71. template <typename ArgumentPack>
  72. void operator()(ArgumentPack const& variates)
  73. {
  74. this->sum += variates[test::_weight];
  75. }
  76. T sum;
  77. };
  78. // This value is required to ensure that a smart compiler's dead code
  79. // elimination doesn't optimize away anything we're testing. We'll use it
  80. // to compute the return code of the executable to make sure it's needed.
  81. double live_code;
  82. // Call objects of the given Accumulator type repeatedly
  83. // with x an argument.
  84. template <typename Accumulator, typename Arg>
  85. void hammer(Arg const& x, long const repeats)
  86. {
  87. // Strategy: because the sum in an accumulator after each call
  88. // depends on the previous value of the sum, the CPU's pipeline
  89. // might be stalled while waiting for the previous addition to
  90. // complete. Therefore, we allocate an array of accumulators,
  91. // and update them in sequence, so that there's no dependency
  92. // between adjacent addition operations.
  93. //
  94. // Additionally, if there were only one accumulator, the compiler or
  95. // CPU might decide to update the value in a register rather than
  96. // writing it back to memory. We want each operation to at least
  97. // update the L1 cache. *** Note: This concern is specific to the
  98. // particular application at which we're targeting the test. ***
  99. // This has to be at least as large as the number of simultaneous
  100. // accumulations that can be executing in the compiler pipeline. A
  101. // safe number here is larger than the machine's maximum pipeline
  102. // depth. If you want to test the L2 or L3 cache, or main memory,
  103. // you can increase the size of this array. 1024 is an upper limit
  104. // on the pipeline depth of current vector machines.
  105. std::size_t const number_of_accumulators = 1024;
  106. Accumulator a[number_of_accumulators];
  107. for (long iteration = 0; iteration < repeats; ++iteration)
  108. {
  109. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
  110. {
  111. (*ap)(x);
  112. }
  113. }
  114. // Accumulate all the partial sums to avoid dead code elimination.
  115. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
  116. {
  117. test::live_code += ap->sum;
  118. }
  119. }
  120. // Measure the time required to hammer accumulators of the given
  121. // type with the argument x.
  122. template <typename Accumulator, typename T>
  123. double measure(T const& x, long const repeats)
  124. {
  125. // Hammer accumulators a couple of times to ensure the instruction
  126. // cache is full of our test code, and that we don't measure the cost
  127. // of a page fault for accessing the data page containing the memory
  128. // where the accumulators will be allocated.
  129. test::hammer<Accumulator>(x, repeats);
  130. test::hammer<Accumulator>(x, repeats);
  131. // Now start a timer.
  132. boost::timer time;
  133. test::hammer<Accumulator>(x, repeats); // This time, we'll measure.
  134. return time.elapsed();
  135. }
  136. }
  137. int main()
  138. {
  139. // First decide how many repetitions to measure.
  140. long repeats = 100;
  141. double measured = 0;
  142. while (measured < 1.0 && repeats <= 10000000)
  143. {
  144. repeats *= 10;
  145. boost::timer time;
  146. test::hammer<test::plain_weight_running_total<double> >(.1, repeats);
  147. test::hammer<test::named_param_weight_running_total<double> >(
  148. (test::_weight = .1, test::_value = .2), repeats
  149. );
  150. measured = time.elapsed();
  151. }
  152. std::cout
  153. << "plain time: "
  154. << test::measure<test::plain_weight_running_total<double> >(
  155. .1, repeats
  156. )
  157. << std::endl;
  158. std::cout
  159. << "named parameter time: "
  160. << test::measure<test::named_param_weight_running_total<double> >(
  161. (test::_weight = .1, test::_value = .2), repeats
  162. )
  163. << std::endl;
  164. // This is ultimately responsible for preventing all the test code
  165. // from being optimized away. Change this to return 0 and you
  166. // unplug the whole test's life support system.
  167. return test::live_code < 0.;
  168. }