ops_gcc_x86_dcas.hpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. /*
  2. * Distributed under the Boost Software License, Version 1.0.
  3. * (See accompanying file LICENSE_1_0.txt or copy at
  4. * http://www.boost.org/LICENSE_1_0.txt)
  5. *
  6. * Copyright (c) 2009 Helge Bahmann
  7. * Copyright (c) 2012 Tim Blechmann
  8. * Copyright (c) 2014 - 2018 Andrey Semashev
  9. */
  10. /*!
  11. * \file atomic/detail/ops_gcc_x86_dcas.hpp
  12. *
  13. * This header contains implementation of the double-width CAS primitive for x86.
  14. */
  15. #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
  16. #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
  17. #include <boost/cstdint.hpp>
  18. #include <boost/memory_order.hpp>
  19. #include <boost/atomic/detail/config.hpp>
  20. #include <boost/atomic/detail/storage_type.hpp>
  21. #include <boost/atomic/detail/string_ops.hpp>
  22. #include <boost/atomic/capabilities.hpp>
  23. #ifdef BOOST_HAS_PRAGMA_ONCE
  24. #pragma once
  25. #endif
  26. namespace boost {
  27. namespace atomics {
  28. namespace detail {
  29. // Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory
  30. // operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang
  31. // is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason
  32. // we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards.
  33. // Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster.
  34. // The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer
  35. // with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it.
  36. #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
  37. template< bool Signed >
  38. struct gcc_dcas_x86
  39. {
  40. typedef typename make_storage_type< 8u >::type storage_type;
  41. typedef typename make_storage_type< 8u >::aligned aligned_storage_type;
  42. typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t;
  43. static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
  44. static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
  45. static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  46. {
  47. if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
  48. {
  49. #if defined(__SSE__)
  50. typedef float xmm_t __attribute__((__vector_size__(16)));
  51. xmm_t xmm_scratch;
  52. __asm__ __volatile__
  53. (
  54. #if defined(__AVX__)
  55. "vmovq %[value], %[xmm_scratch]\n\t"
  56. "vmovq %[xmm_scratch], %[storage]\n\t"
  57. #elif defined(__SSE2__)
  58. "movq %[value], %[xmm_scratch]\n\t"
  59. "movq %[xmm_scratch], %[storage]\n\t"
  60. #else
  61. "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
  62. "movlps %[value], %[xmm_scratch]\n\t"
  63. "movlps %[xmm_scratch], %[storage]\n\t"
  64. #endif
  65. : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch)
  66. : [value] "m" (v)
  67. : "memory"
  68. );
  69. #else
  70. __asm__ __volatile__
  71. (
  72. "fildll %[value]\n\t"
  73. "fistpll %[storage]\n\t"
  74. : [storage] "=m" (storage)
  75. : [value] "m" (v)
  76. : "memory"
  77. );
  78. #endif
  79. }
  80. else
  81. {
  82. #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  83. __asm__ __volatile__
  84. (
  85. "xchgl %%ebx, %%esi\n\t"
  86. "movl %%eax, %%ebx\n\t"
  87. "movl (%[dest]), %%eax\n\t"
  88. "movl 4(%[dest]), %%edx\n\t"
  89. ".align 16\n\t"
  90. "1: lock; cmpxchg8b (%[dest])\n\t"
  91. "jne 1b\n\t"
  92. "xchgl %%ebx, %%esi\n\t"
  93. :
  94. : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
  95. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory"
  96. );
  97. #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  98. __asm__ __volatile__
  99. (
  100. "movl %[dest_lo], %%eax\n\t"
  101. "movl %[dest_hi], %%edx\n\t"
  102. ".align 16\n\t"
  103. "1: lock; cmpxchg8b %[dest_lo]\n\t"
  104. "jne 1b\n\t"
  105. : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
  106. : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
  107. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory"
  108. );
  109. #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  110. }
  111. }
  112. static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
  113. {
  114. storage_type value;
  115. if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
  116. {
  117. #if defined(__SSE__)
  118. typedef float xmm_t __attribute__((__vector_size__(16)));
  119. xmm_t xmm_scratch;
  120. __asm__ __volatile__
  121. (
  122. #if defined(__AVX__)
  123. "vmovq %[storage], %[xmm_scratch]\n\t"
  124. "vmovq %[xmm_scratch], %[value]\n\t"
  125. #elif defined(__SSE2__)
  126. "movq %[storage], %[xmm_scratch]\n\t"
  127. "movq %[xmm_scratch], %[value]\n\t"
  128. #else
  129. "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
  130. "movlps %[storage], %[xmm_scratch]\n\t"
  131. "movlps %[xmm_scratch], %[value]\n\t"
  132. #endif
  133. : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch)
  134. : [storage] "m" (storage)
  135. : "memory"
  136. );
  137. #else
  138. __asm__ __volatile__
  139. (
  140. "fildll %[storage]\n\t"
  141. "fistpll %[value]\n\t"
  142. : [value] "=m" (value)
  143. : [storage] "m" (storage)
  144. : "memory"
  145. );
  146. #endif
  147. }
  148. else
  149. {
  150. // Note that despite const qualification cmpxchg8b below may issue a store to the storage. The storage value
  151. // will not change, but this prevents the storage to reside in read-only memory.
  152. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  153. uint32_t value_bits[2];
  154. // We don't care for comparison result here; the previous value will be stored into value anyway.
  155. // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
  156. __asm__ __volatile__
  157. (
  158. "movl %%ebx, %%eax\n\t"
  159. "movl %%ecx, %%edx\n\t"
  160. "lock; cmpxchg8b %[storage]\n\t"
  161. : "=&a" (value_bits[0]), "=&d" (value_bits[1])
  162. : [storage] "m" (storage)
  163. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  164. );
  165. BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
  166. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  167. // We don't care for comparison result here; the previous value will be stored into value anyway.
  168. // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
  169. __asm__ __volatile__
  170. (
  171. "movl %%ebx, %%eax\n\t"
  172. "movl %%ecx, %%edx\n\t"
  173. "lock; cmpxchg8b %[storage]\n\t"
  174. : "=&A" (value)
  175. : [storage] "m" (storage)
  176. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  177. );
  178. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  179. }
  180. return value;
  181. }
  182. static BOOST_FORCEINLINE bool compare_exchange_strong(
  183. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
  184. {
  185. #if defined(__clang__)
  186. // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
  187. storage_type old_expected = expected;
  188. expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
  189. return expected == old_expected;
  190. #elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  191. bool success;
  192. #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  193. __asm__ __volatile__
  194. (
  195. "xchgl %%ebx, %%esi\n\t"
  196. "lock; cmpxchg8b (%[dest])\n\t"
  197. "xchgl %%ebx, %%esi\n\t"
  198. : "+A" (expected), [success] "=@ccz" (success)
  199. : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
  200. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  201. );
  202. #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  203. __asm__ __volatile__
  204. (
  205. "xchgl %%ebx, %%esi\n\t"
  206. "lock; cmpxchg8b (%[dest])\n\t"
  207. "xchgl %%ebx, %%esi\n\t"
  208. "sete %[success]\n\t"
  209. : "+A" (expected), [success] "=qm" (success)
  210. : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
  211. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  212. );
  213. #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  214. return success;
  215. #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  216. bool success;
  217. #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  218. __asm__ __volatile__
  219. (
  220. "lock; cmpxchg8b %[dest]\n\t"
  221. : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success)
  222. : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
  223. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  224. );
  225. #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  226. __asm__ __volatile__
  227. (
  228. "lock; cmpxchg8b %[dest]\n\t"
  229. "sete %[success]\n\t"
  230. : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
  231. : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
  232. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  233. );
  234. #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  235. return success;
  236. #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  237. }
  238. static BOOST_FORCEINLINE bool compare_exchange_weak(
  239. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
  240. {
  241. return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
  242. }
  243. static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  244. {
  245. #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  246. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  247. uint32_t old_bits[2];
  248. __asm__ __volatile__
  249. (
  250. "xchgl %%ebx, %%esi\n\t"
  251. "movl (%[dest]), %%eax\n\t"
  252. "movl 4(%[dest]), %%edx\n\t"
  253. ".align 16\n\t"
  254. "1: lock; cmpxchg8b (%[dest])\n\t"
  255. "jne 1b\n\t"
  256. "xchgl %%ebx, %%esi\n\t"
  257. : "=a" (old_bits[0]), "=d" (old_bits[1])
  258. : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
  259. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  260. );
  261. storage_type old_value;
  262. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  263. return old_value;
  264. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  265. storage_type old_value;
  266. __asm__ __volatile__
  267. (
  268. "xchgl %%ebx, %%esi\n\t"
  269. "movl (%[dest]), %%eax\n\t"
  270. "movl 4(%[dest]), %%edx\n\t"
  271. ".align 16\n\t"
  272. "1: lock; cmpxchg8b (%[dest])\n\t"
  273. "jne 1b\n\t"
  274. "xchgl %%ebx, %%esi\n\t"
  275. : "=A" (old_value)
  276. : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
  277. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  278. );
  279. return old_value;
  280. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  281. #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  282. #if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407
  283. // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below
  284. uint32_t old_bits[2];
  285. __asm__ __volatile__
  286. (
  287. "movl (%[dest]), %%eax\n\t"
  288. "movl 4(%[dest]), %%edx\n\t"
  289. ".align 16\n\t"
  290. "1: lock; cmpxchg8b (%[dest])\n\t"
  291. "jne 1b\n\t"
  292. : "=&a" (old_bits[0]), "=&d" (old_bits[1])
  293. : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage)
  294. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  295. );
  296. storage_type old_value;
  297. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  298. return old_value;
  299. #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  300. uint32_t old_bits[2];
  301. __asm__ __volatile__
  302. (
  303. "movl %[dest_lo], %%eax\n\t"
  304. "movl %[dest_hi], %%edx\n\t"
  305. ".align 16\n\t"
  306. "1: lock; cmpxchg8b %[dest_lo]\n\t"
  307. "jne 1b\n\t"
  308. : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
  309. : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
  310. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  311. );
  312. storage_type old_value;
  313. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  314. return old_value;
  315. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  316. storage_type old_value;
  317. __asm__ __volatile__
  318. (
  319. "movl %[dest_lo], %%eax\n\t"
  320. "movl %[dest_hi], %%edx\n\t"
  321. ".align 16\n\t"
  322. "1: lock; cmpxchg8b %[dest_lo]\n\t"
  323. "jne 1b\n\t"
  324. : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
  325. : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
  326. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  327. );
  328. return old_value;
  329. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  330. #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  331. }
  332. };
  333. #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
  334. #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
  335. template< bool Signed >
  336. struct gcc_dcas_x86_64
  337. {
  338. typedef typename make_storage_type< 16u >::type storage_type;
  339. typedef typename make_storage_type< 16u >::aligned aligned_storage_type;
  340. typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t;
  341. static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
  342. static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
  343. static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  344. {
  345. __asm__ __volatile__
  346. (
  347. "movq %[dest_lo], %%rax\n\t"
  348. "movq %[dest_hi], %%rdx\n\t"
  349. ".align 16\n\t"
  350. "1: lock; cmpxchg16b %[dest_lo]\n\t"
  351. "jne 1b\n\t"
  352. : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
  353. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
  354. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory"
  355. );
  356. }
  357. static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
  358. {
  359. // Note that despite const qualification cmpxchg16b below may issue a store to the storage. The storage value
  360. // will not change, but this prevents the storage to reside in read-only memory.
  361. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  362. uint64_t value_bits[2];
  363. // We don't care for comparison result here; the previous value will be stored into value anyway.
  364. // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
  365. __asm__ __volatile__
  366. (
  367. "movq %%rbx, %%rax\n\t"
  368. "movq %%rcx, %%rdx\n\t"
  369. "lock; cmpxchg16b %[storage]\n\t"
  370. : "=&a" (value_bits[0]), "=&d" (value_bits[1])
  371. : [storage] "m" (storage)
  372. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  373. );
  374. storage_type value;
  375. BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
  376. return value;
  377. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  378. storage_type value;
  379. // We don't care for comparison result here; the previous value will be stored into value anyway.
  380. // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
  381. __asm__ __volatile__
  382. (
  383. "movq %%rbx, %%rax\n\t"
  384. "movq %%rcx, %%rdx\n\t"
  385. "lock; cmpxchg16b %[storage]\n\t"
  386. : "=&A" (value)
  387. : [storage] "m" (storage)
  388. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  389. );
  390. return value;
  391. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  392. }
  393. static BOOST_FORCEINLINE bool compare_exchange_strong(
  394. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
  395. {
  396. #if defined(__clang__)
  397. // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics
  398. storage_type old_expected = expected;
  399. expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
  400. return expected == old_expected;
  401. #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  402. // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap
  403. bool success;
  404. __asm__ __volatile__
  405. (
  406. "lock; cmpxchg16b %[dest]\n\t"
  407. "sete %[success]\n\t"
  408. : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success)
  409. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
  410. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  411. );
  412. return success;
  413. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  414. bool success;
  415. #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  416. __asm__ __volatile__
  417. (
  418. "lock; cmpxchg16b %[dest]\n\t"
  419. : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success)
  420. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
  421. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  422. );
  423. #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  424. __asm__ __volatile__
  425. (
  426. "lock; cmpxchg16b %[dest]\n\t"
  427. "sete %[success]\n\t"
  428. : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
  429. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
  430. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  431. );
  432. #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  433. return success;
  434. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  435. }
  436. static BOOST_FORCEINLINE bool compare_exchange_weak(
  437. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
  438. {
  439. return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
  440. }
  441. static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  442. {
  443. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  444. uint64_t old_bits[2];
  445. __asm__ __volatile__
  446. (
  447. "movq %[dest_lo], %%rax\n\t"
  448. "movq %[dest_hi], %%rdx\n\t"
  449. ".align 16\n\t"
  450. "1: lock; cmpxchg16b %[dest_lo]\n\t"
  451. "jne 1b\n\t"
  452. : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1])
  453. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
  454. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  455. );
  456. storage_type old_value;
  457. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  458. return old_value;
  459. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  460. storage_type old_value;
  461. __asm__ __volatile__
  462. (
  463. "movq %[dest_lo], %%rax\n\t"
  464. "movq %[dest_hi], %%rdx\n\t"
  465. ".align 16\n\t"
  466. "1: lock; cmpxchg16b %[dest_lo]\n\t"
  467. "jne 1b\n\t"
  468. : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
  469. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
  470. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  471. );
  472. return old_value;
  473. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  474. }
  475. };
  476. #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
  477. } // namespace detail
  478. } // namespace atomics
  479. } // namespace boost
  480. #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_