ordering.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. // Copyright (c) 2011 Helge Bahmann
  2. // Copyright (c) 2012 Tim Blechmann
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. // Attempt to determine whether the memory ordering/ fence operations
  8. // work as expected:
  9. // Let two threads race accessing multiple shared variables and
  10. // verify that "observable" order of operations matches with the
  11. // ordering constraints specified.
  12. //
  13. // We assume that "memory ordering violation" events are exponentially
  14. // distributed, with unknown "average time between violations"
  15. // (which is just the reciprocal of exp distribution parameter lambda).
  16. // Use a "relaxed ordering" implementation that intentionally exhibits
  17. // a (hopefully observable) such violation to compute the maximum-likelihood
  18. // estimate for this time. From this, compute an estimate that covers the
  19. // unknown value with 0.995 confidence (using chi square quantile).
  20. //
  21. // Use this estimate to pick a timeout for the race tests of the
  22. // atomic implementations such that under the assumed distribution
  23. // we get 0.995 probability to detect a race (if there is one).
  24. //
  25. // Overall this yields 0.995 * 0.995 > 0.99 confidence that the
  26. // fences work as expected if this test program does not
  27. // report an error.
  28. #include <boost/atomic.hpp>
  29. #include <boost/bind.hpp>
  30. #include <boost/date_time/posix_time/posix_time_types.hpp>
  31. #include <boost/thread/thread.hpp>
  32. #include <boost/thread/thread_time.hpp>
  33. #include <boost/thread/locks.hpp>
  34. #include <boost/thread/mutex.hpp>
  35. #include <boost/thread/condition_variable.hpp>
  36. #include <boost/thread/barrier.hpp>
  37. #include <boost/core/lightweight_test.hpp>
  38. // Two threads perform the following operations:
  39. //
  40. // thread # 1 thread # 2
  41. // store(a, 1) store(b, 1)
  42. // x = read(b) y = read(a)
  43. //
  44. // Under relaxed memory ordering, the case (x, y) == (0, 0) is
  45. // possible. Under sequential consistency, this case is impossible.
  46. //
  47. // This "problem" is reproducible on all platforms, even x86.
  48. template<boost::memory_order store_order, boost::memory_order load_order>
  49. class total_store_order_test {
  50. public:
  51. total_store_order_test(void);
  52. void run(boost::posix_time::time_duration & timeout);
  53. bool detected_conflict(void) const { return detected_conflict_; }
  54. private:
  55. void thread1fn(void);
  56. void thread2fn(void);
  57. void check_conflict(void);
  58. boost::atomic<int> a_;
  59. /* insert a bit of padding to push the two variables into
  60. different cache lines and increase the likelihood of detecting
  61. a conflict */
  62. char pad1_[512];
  63. boost::atomic<int> b_;
  64. char pad2_[512];
  65. boost::barrier barrier_;
  66. int vrfyb1_, vrfya2_;
  67. boost::atomic<bool> terminate_threads_;
  68. boost::atomic<int> termination_consensus_;
  69. bool detected_conflict_;
  70. boost::mutex m_;
  71. boost::condition_variable c_;
  72. };
  73. template<boost::memory_order store_order, boost::memory_order load_order>
  74. total_store_order_test<store_order, load_order>::total_store_order_test(void)
  75. : a_(0), b_(0), barrier_(2),
  76. vrfyb1_(0), vrfya2_(0),
  77. terminate_threads_(false), termination_consensus_(0),
  78. detected_conflict_(false)
  79. {
  80. }
  81. template<boost::memory_order store_order, boost::memory_order load_order>
  82. void
  83. total_store_order_test<store_order, load_order>::run(boost::posix_time::time_duration & timeout)
  84. {
  85. boost::system_time start = boost::get_system_time();
  86. boost::system_time end = start + timeout;
  87. boost::thread t1(boost::bind(&total_store_order_test::thread1fn, this));
  88. boost::thread t2(boost::bind(&total_store_order_test::thread2fn, this));
  89. {
  90. boost::mutex::scoped_lock guard(m_);
  91. while (boost::get_system_time() < end && !detected_conflict_)
  92. c_.timed_wait(guard, end);
  93. }
  94. terminate_threads_.store(true, boost::memory_order_relaxed);
  95. t2.join();
  96. t1.join();
  97. boost::posix_time::time_duration duration = boost::get_system_time() - start;
  98. if (duration < timeout)
  99. timeout = duration;
  100. }
  101. volatile int backoff_dummy;
  102. template<boost::memory_order store_order, boost::memory_order load_order>
  103. void
  104. total_store_order_test<store_order, load_order>::thread1fn(void)
  105. {
  106. for (;;) {
  107. a_.store(1, store_order);
  108. int b = b_.load(load_order);
  109. barrier_.wait();
  110. vrfyb1_ = b;
  111. barrier_.wait();
  112. check_conflict();
  113. /* both threads synchronize via barriers, so either
  114. both threads must exit here, or they must both do
  115. another round, otherwise one of them will wait forever */
  116. if (terminate_threads_.load(boost::memory_order_relaxed)) for (;;) {
  117. int tmp = termination_consensus_.fetch_or(1, boost::memory_order_relaxed);
  118. if (tmp == 3)
  119. return;
  120. if (tmp & 4)
  121. break;
  122. }
  123. termination_consensus_.fetch_xor(4, boost::memory_order_relaxed);
  124. unsigned int delay = rand() % 10000;
  125. a_.store(0, boost::memory_order_relaxed);
  126. barrier_.wait();
  127. while(delay--) { backoff_dummy = delay; }
  128. }
  129. }
  130. template<boost::memory_order store_order, boost::memory_order load_order>
  131. void
  132. total_store_order_test<store_order, load_order>::thread2fn(void)
  133. {
  134. for (;;) {
  135. b_.store(1, store_order);
  136. int a = a_.load(load_order);
  137. barrier_.wait();
  138. vrfya2_ = a;
  139. barrier_.wait();
  140. check_conflict();
  141. /* both threads synchronize via barriers, so either
  142. both threads must exit here, or they must both do
  143. another round, otherwise one of them will wait forever */
  144. if (terminate_threads_.load(boost::memory_order_relaxed)) for (;;) {
  145. int tmp = termination_consensus_.fetch_or(2, boost::memory_order_relaxed);
  146. if (tmp == 3)
  147. return;
  148. if (tmp & 4)
  149. break;
  150. }
  151. termination_consensus_.fetch_xor(4, boost::memory_order_relaxed);
  152. unsigned int delay = rand() % 10000;
  153. b_.store(0, boost::memory_order_relaxed);
  154. barrier_.wait();
  155. while(delay--) { backoff_dummy = delay; }
  156. }
  157. }
  158. template<boost::memory_order store_order, boost::memory_order load_order>
  159. void
  160. total_store_order_test<store_order, load_order>::check_conflict(void)
  161. {
  162. if (vrfyb1_ == 0 && vrfya2_ == 0) {
  163. boost::mutex::scoped_lock guard(m_);
  164. detected_conflict_ = true;
  165. terminate_threads_.store(true, boost::memory_order_relaxed);
  166. c_.notify_all();
  167. }
  168. }
  169. void
  170. test_seq_cst(void)
  171. {
  172. double sum = 0.0;
  173. /* take 10 samples */
  174. for (size_t n = 0; n < 10; n++) {
  175. boost::posix_time::time_duration timeout(0, 0, 10);
  176. total_store_order_test<boost::memory_order_relaxed, boost::memory_order_relaxed> test;
  177. test.run(timeout);
  178. if (!test.detected_conflict()) {
  179. std::cout << "Failed to detect order=seq_cst violation while ith order=relaxed -- intrinsic ordering too strong for this test\n";
  180. return;
  181. }
  182. std::cout << "seq_cst violation with order=relaxed after " << timeout.total_microseconds() << " us\n";
  183. sum = sum + timeout.total_microseconds();
  184. }
  185. /* determine maximum likelihood estimate for average time between
  186. race observations */
  187. double avg_race_time_mle = (sum / 10);
  188. /* pick 0.995 confidence (7.44 = chi square 0.995 confidence) */
  189. double avg_race_time_995 = avg_race_time_mle * 2 * 10 / 7.44;
  190. /* 5.298 = 0.995 quantile of exponential distribution */
  191. boost::posix_time::time_duration timeout = boost::posix_time::microseconds((long)(5.298 * avg_race_time_995));
  192. std::cout << "run seq_cst for " << timeout.total_microseconds() << " us\n";
  193. total_store_order_test<boost::memory_order_seq_cst, boost::memory_order_seq_cst> test;
  194. test.run(timeout);
  195. BOOST_TEST(!test.detected_conflict()); // sequential consistency error
  196. }
  197. int main(int, char *[])
  198. {
  199. test_seq_cst();
  200. return boost::report_errors();
  201. }