// Copyright David Abrahams, Matthias Troyer, Michael Gauckler // 2005. Distributed under the Boost Software License, Version // 1.0. (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if !defined(LIVE_CODE_TYPE) # define LIVE_CODE_TYPE int #endif #include namespace test { // This value is required to ensure that a smart compiler's dead // code elimination doesn't optimize away anything we're testing. // We'll use it to compute the return code of the executable to make // sure it's needed. LIVE_CODE_TYPE live_code; // Call objects of the given Accumulator type repeatedly with x as // an argument. template void hammer(Arg const& x, long const repeats) { // Strategy: because the sum in an accumulator after each call // depends on the previous value of the sum, the CPU's pipeline // might be stalled while waiting for the previous addition to // complete. Therefore, we allocate an array of accumulators, // and update them in sequence, so that there's no dependency // between adjacent addition operations. // // Additionally, if there were only one accumulator, the // compiler or CPU might decide to update the value in a // register rather that writing it back to memory. we want each // operation to at least update the L1 cache. *** Note: This // concern is specific to the particular application at which // we're targeting the test. *** // This has to be at least as large as the number of // simultaneous accumulations that can be executing in the // compiler pipeline. A safe number here is larger than the // machine's maximum pipeline depth. If you want to test the L2 // or L3 cache, or main memory, you can increase the size of // this array. 1024 is an upper limit on the pipeline depth of // current vector machines. const std::size_t number_of_accumulators = 1024; live_code = 0; // reset to zero Accumulator a[number_of_accumulators]; for (long iteration = 0; iteration < repeats; ++iteration) { for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) { (*ap)(x); } } // Accumulate all the partial sums to avoid dead code // elimination. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) { live_code += ap->sum; } } // Measure the time required to hammer accumulators of the given // type with the argument x. template double measure(T const& x, long const repeats) { // Hammer accumulators a couple of times to ensure the // instruction cache is full of our test code, and that we don't // measure the cost of a page fault for accessing the data page // containing the memory where the accumulators will be // allocated hammer(x, repeats); hammer(x, repeats); // Now start a timer boost::timer time; hammer(x, repeats); // This time, we'll measure return time.elapsed() / repeats; // return the time of one iteration } }