measure.hpp 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. // Copyright David Abrahams, Matthias Troyer, Michael Gauckler
  2. // 2005. Distributed under the Boost Software License, Version
  3. // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_TEST_BENCHMARK_HPP)
  6. #define BOOST_SPIRIT_TEST_BENCHMARK_HPP
  7. #ifdef _MSC_VER
  8. // inline aggressively
  9. # pragma inline_recursion(on) // turn on inline recursion
  10. # pragma inline_depth(255) // max inline depth
  11. # define _SECURE_SCL 0
  12. #endif
  13. #include "high_resolution_timer.hpp"
  14. #include <iostream>
  15. #include <cstring>
  16. #include <boost/preprocessor/seq/for_each.hpp>
  17. #include <boost/preprocessor/stringize.hpp>
  18. namespace test
  19. {
  20. // This value is required to ensure that a smart compiler's dead
  21. // code elimination doesn't optimize away anything we're testing.
  22. // We'll use it to compute the return code of the executable to make
  23. // sure it's needed.
  24. int live_code;
  25. // Call objects of the given Accumulator type repeatedly
  26. template <class Accumulator>
  27. void hammer(long const repeats)
  28. {
  29. // Strategy: because the sum in an accumulator after each call
  30. // depends on the previous value of the sum, the CPU's pipeline
  31. // might be stalled while waiting for the previous addition to
  32. // complete. Therefore, we allocate an array of accumulators,
  33. // and update them in sequence, so that there's no dependency
  34. // between adjacent addition operations.
  35. //
  36. // Additionally, if there were only one accumulator, the
  37. // compiler or CPU might decide to update the value in a
  38. // register rather that writing it back to memory. we want each
  39. // operation to at least update the L1 cache. *** Note: This
  40. // concern is specific to the particular application at which
  41. // we're targeting the test. ***
  42. // This has to be at least as large as the number of
  43. // simultaneous accumulations that can be executing in the
  44. // compiler pipeline. A safe number here is larger than the
  45. // machine's maximum pipeline depth. If you want to test the L2
  46. // or L3 cache, or main memory, you can increase the size of
  47. // this array. 1024 is an upper limit on the pipeline depth of
  48. // current vector machines.
  49. const std::size_t number_of_accumulators = 1024;
  50. live_code = 0; // reset to zero
  51. Accumulator a[number_of_accumulators];
  52. for (long iteration = 0; iteration < repeats; ++iteration)
  53. {
  54. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
  55. {
  56. ap->benchmark();
  57. }
  58. }
  59. // Accumulate all the partial sums to avoid dead code
  60. // elimination.
  61. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
  62. {
  63. live_code += ap->val;
  64. }
  65. }
  66. // Measure the time required to hammer accumulators of the given type
  67. template <class Accumulator>
  68. double measure(long const repeats)
  69. {
  70. // Hammer accumulators a couple of times to ensure the
  71. // instruction cache is full of our test code, and that we don't
  72. // measure the cost of a page fault for accessing the data page
  73. // containing the memory where the accumulators will be
  74. // allocated
  75. hammer<Accumulator>(repeats);
  76. hammer<Accumulator>(repeats);
  77. // Now start a timer
  78. util::high_resolution_timer time;
  79. hammer<Accumulator>(repeats); // This time, we'll measure
  80. return time.elapsed(); // return the elapsed time
  81. }
  82. template <class Accumulator>
  83. void report(char const* name, long const repeats)
  84. {
  85. std::cout.precision(10);
  86. std::cout << name << ": ";
  87. for (int i = 0; i < (20-int(strlen(name))); ++i)
  88. std::cout << ' ';
  89. std::cout << std::fixed << test::measure<Accumulator>(repeats) << " [s] ";
  90. Accumulator acc;
  91. acc.benchmark();
  92. std::cout << std::hex << "{checksum: " << acc.val << "}";
  93. std::cout << std::flush << std::endl;
  94. }
  95. struct base
  96. {
  97. base() : val(0) {}
  98. int val; // This is needed to avoid dead-code elimination
  99. };
  100. #define BOOST_SPIRIT_TEST_HAMMER(r, data, elem) \
  101. test::hammer<elem>(repeats);
  102. /***/
  103. #define BOOST_SPIRIT_TEST_MEASURE(r, data, elem) \
  104. test::report<elem>(BOOST_PP_STRINGIZE(elem), repeats); \
  105. /***/
  106. #define BOOST_SPIRIT_TEST_BENCHMARK(max_repeats, FSeq) \
  107. long repeats = 100; \
  108. double measured = 0; \
  109. while (measured < 2.0 && repeats <= max_repeats) \
  110. { \
  111. repeats *= 10; \
  112. util::high_resolution_timer time; \
  113. BOOST_PP_SEQ_FOR_EACH(BOOST_SPIRIT_TEST_HAMMER, _, FSeq) \
  114. measured = time.elapsed(); \
  115. } \
  116. BOOST_PP_SEQ_FOR_EACH(BOOST_SPIRIT_TEST_MEASURE, _, FSeq) \
  117. /***/
  118. }
  119. #endif