// Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2005. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include namespace test { // // This test measures the abstraction overhead of using the named // parameter interface. Some actual test results have been recorded // in timings.txt in this source file's directory, or // http://www.boost.org/libs/parameter/test/timings.txt. // // Caveats: // // 1. This test penalizes the named parameter library slightly, by // passing two arguments through the named interface, while // only passing one through the plain C++ interface. // // 2. This test does not measure the case where an ArgumentPack is // so large that it doesn't fit in the L1 cache. // // 3. Although we've tried to make this test as general as possible, // we are targeting it at a specific application. Where that // affects design decisions, we've noted it below in ***...***. // // 4. The first time you run this program, the time may not be // representative because of disk and memory cache effects, so // always run it multiple times and ignore the first // measurement. This approach will also allow you to estimate // the statistical error of your test by observing the // variation in the valid times. // // 5. Try to run this program on a machine that's otherwise idle, // or other processes and even device hardware interrupts may // interfere by causing caches to be flushed. // Accumulator function object with plain C++ interface template struct plain_weight_running_total { plain_weight_running_total() #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) : sum(T()) #else : sum() #endif { } void operator()(T w) { this->sum += w; } T sum; }; BOOST_PARAMETER_NAME(weight) BOOST_PARAMETER_NAME(value) // Accumulator function object with named parameter interface template struct named_param_weight_running_total { named_param_weight_running_total() #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) : sum(T()) #else : sum() #endif { } template void operator()(ArgumentPack const& variates) { this->sum += variates[test::_weight]; } T sum; }; // This value is required to ensure that a smart compiler's dead code // elimination doesn't optimize away anything we're testing. We'll use it // to compute the return code of the executable to make sure it's needed. double live_code; // Call objects of the given Accumulator type repeatedly // with x an argument. template void hammer(Arg const& x, long const repeats) { // Strategy: because the sum in an accumulator after each call // depends on the previous value of the sum, the CPU's pipeline // might be stalled while waiting for the previous addition to // complete. Therefore, we allocate an array of accumulators, // and update them in sequence, so that there's no dependency // between adjacent addition operations. // // Additionally, if there were only one accumulator, the compiler or // CPU might decide to update the value in a register rather than // writing it back to memory. We want each operation to at least // update the L1 cache. *** Note: This concern is specific to the // particular application at which we're targeting the test. *** // This has to be at least as large as the number of simultaneous // accumulations that can be executing in the compiler pipeline. A // safe number here is larger than the machine's maximum pipeline // depth. If you want to test the L2 or L3 cache, or main memory, // you can increase the size of this array. 1024 is an upper limit // on the pipeline depth of current vector machines. std::size_t const number_of_accumulators = 1024; Accumulator a[number_of_accumulators]; for (long iteration = 0; iteration < repeats; ++iteration) { for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) { (*ap)(x); } } // Accumulate all the partial sums to avoid dead code elimination. for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) { test::live_code += ap->sum; } } // Measure the time required to hammer accumulators of the given // type with the argument x. template double measure(T const& x, long const repeats) { // Hammer accumulators a couple of times to ensure the instruction // cache is full of our test code, and that we don't measure the cost // of a page fault for accessing the data page containing the memory // where the accumulators will be allocated. test::hammer(x, repeats); test::hammer(x, repeats); // Now start a timer. boost::timer time; test::hammer(x, repeats); // This time, we'll measure. return time.elapsed(); } } int main() { // First decide how many repetitions to measure. long repeats = 100; double measured = 0; while (measured < 1.0 && repeats <= 10000000) { repeats *= 10; boost::timer time; test::hammer >(.1, repeats); test::hammer >( (test::_weight = .1, test::_value = .2), repeats ); measured = time.elapsed(); } std::cout << "plain time: " << test::measure >( .1, repeats ) << std::endl; std::cout << "named parameter time: " << test::measure >( (test::_weight = .1, test::_value = .2), repeats ) << std::endl; // This is ultimately responsible for preventing all the test code // from being optimized away. Change this to return 0 and you // unplug the whole test's life support system. return test::live_code < 0.; }