measure.hpp 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. // Copyright David Abrahams, Matthias Troyer, Michael Gauckler
  2. // 2005. Distributed under the Boost Software License, Version
  3. // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(LIVE_CODE_TYPE)
  6. # define LIVE_CODE_TYPE int
  7. #endif
  8. #include <boost/timer.hpp>
  9. namespace test
  10. {
  11. // This value is required to ensure that a smart compiler's dead
  12. // code elimination doesn't optimize away anything we're testing.
  13. // We'll use it to compute the return code of the executable to make
  14. // sure it's needed.
  15. LIVE_CODE_TYPE live_code;
  16. // Call objects of the given Accumulator type repeatedly with x as
  17. // an argument.
  18. template <class Accumulator, class Arg>
  19. void hammer(Arg const& x, long const repeats)
  20. {
  21. // Strategy: because the sum in an accumulator after each call
  22. // depends on the previous value of the sum, the CPU's pipeline
  23. // might be stalled while waiting for the previous addition to
  24. // complete. Therefore, we allocate an array of accumulators,
  25. // and update them in sequence, so that there's no dependency
  26. // between adjacent addition operations.
  27. //
  28. // Additionally, if there were only one accumulator, the
  29. // compiler or CPU might decide to update the value in a
  30. // register rather that writing it back to memory. we want each
  31. // operation to at least update the L1 cache. *** Note: This
  32. // concern is specific to the particular application at which
  33. // we're targeting the test. ***
  34. // This has to be at least as large as the number of
  35. // simultaneous accumulations that can be executing in the
  36. // compiler pipeline. A safe number here is larger than the
  37. // machine's maximum pipeline depth. If you want to test the L2
  38. // or L3 cache, or main memory, you can increase the size of
  39. // this array. 1024 is an upper limit on the pipeline depth of
  40. // current vector machines.
  41. const std::size_t number_of_accumulators = 1024;
  42. live_code = 0; // reset to zero
  43. Accumulator a[number_of_accumulators];
  44. for (long iteration = 0; iteration < repeats; ++iteration)
  45. {
  46. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
  47. {
  48. (*ap)(x);
  49. }
  50. }
  51. // Accumulate all the partial sums to avoid dead code
  52. // elimination.
  53. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
  54. {
  55. live_code += ap->sum;
  56. }
  57. }
  58. // Measure the time required to hammer accumulators of the given
  59. // type with the argument x.
  60. template <class Accumulator, class T>
  61. double measure(T const& x, long const repeats)
  62. {
  63. // Hammer accumulators a couple of times to ensure the
  64. // instruction cache is full of our test code, and that we don't
  65. // measure the cost of a page fault for accessing the data page
  66. // containing the memory where the accumulators will be
  67. // allocated
  68. hammer<Accumulator>(x, repeats);
  69. hammer<Accumulator>(x, repeats);
  70. // Now start a timer
  71. boost::timer time;
  72. hammer<Accumulator>(x, repeats); // This time, we'll measure
  73. return time.elapsed() / repeats; // return the time of one iteration
  74. }
  75. }