12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- // Copyright David Abrahams, Matthias Troyer, Michael Gauckler
- // 2005. Distributed under the Boost Software License, Version
- // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- #if !defined(LIVE_CODE_TYPE)
- # define LIVE_CODE_TYPE int
- #endif
- #include <boost/timer.hpp>
- namespace test
- {
- // This value is required to ensure that a smart compiler's dead
- // code elimination doesn't optimize away anything we're testing.
- // We'll use it to compute the return code of the executable to make
- // sure it's needed.
- LIVE_CODE_TYPE live_code;
- // Call objects of the given Accumulator type repeatedly with x as
- // an argument.
- template <class Accumulator, class Arg>
- void hammer(Arg const& x, long const repeats)
- {
- // Strategy: because the sum in an accumulator after each call
- // depends on the previous value of the sum, the CPU's pipeline
- // might be stalled while waiting for the previous addition to
- // complete. Therefore, we allocate an array of accumulators,
- // and update them in sequence, so that there's no dependency
- // between adjacent addition operations.
- //
- // Additionally, if there were only one accumulator, the
- // compiler or CPU might decide to update the value in a
- // register rather that writing it back to memory. we want each
- // operation to at least update the L1 cache. *** Note: This
- // concern is specific to the particular application at which
- // we're targeting the test. ***
- // This has to be at least as large as the number of
- // simultaneous accumulations that can be executing in the
- // compiler pipeline. A safe number here is larger than the
- // machine's maximum pipeline depth. If you want to test the L2
- // or L3 cache, or main memory, you can increase the size of
- // this array. 1024 is an upper limit on the pipeline depth of
- // current vector machines.
- const std::size_t number_of_accumulators = 1024;
- live_code = 0; // reset to zero
- Accumulator a[number_of_accumulators];
-
- for (long iteration = 0; iteration < repeats; ++iteration)
- {
- for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
- {
- (*ap)(x);
- }
- }
- // Accumulate all the partial sums to avoid dead code
- // elimination.
- for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
- {
- live_code += ap->sum;
- }
- }
- // Measure the time required to hammer accumulators of the given
- // type with the argument x.
- template <class Accumulator, class T>
- double measure(T const& x, long const repeats)
- {
- // Hammer accumulators a couple of times to ensure the
- // instruction cache is full of our test code, and that we don't
- // measure the cost of a page fault for accessing the data page
- // containing the memory where the accumulators will be
- // allocated
- hammer<Accumulator>(x, repeats);
- hammer<Accumulator>(x, repeats);
- // Now start a timer
- boost::timer time;
- hammer<Accumulator>(x, repeats); // This time, we'll measure
- return time.elapsed() / repeats; // return the time of one iteration
- }
- }
|