threefry_engine.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
  11. #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP
  12. #include <algorithm>
  13. #include <boost/compute/types.hpp>
  14. #include <boost/compute/buffer.hpp>
  15. #include <boost/compute/kernel.hpp>
  16. #include <boost/compute/context.hpp>
  17. #include <boost/compute/program.hpp>
  18. #include <boost/compute/command_queue.hpp>
  19. #include <boost/compute/algorithm/transform.hpp>
  20. #include <boost/compute/detail/iterator_range_size.hpp>
  21. #include <boost/compute/utility/program_cache.hpp>
  22. #include <boost/compute/container/vector.hpp>
  23. #include <boost/compute/iterator/discard_iterator.hpp>
  24. namespace boost {
  25. namespace compute {
  26. /// \class threefry_engine
  27. /// \brief Threefry pseudorandom number generator.
  28. template<class T = uint_>
  29. class threefry_engine
  30. {
  31. public:
  32. typedef T result_type;
  33. static const ulong_ default_seed = 0UL;
  34. /// Creates a new threefry_engine and seeds it with \p value.
  35. explicit threefry_engine(command_queue &queue,
  36. ulong_ value = default_seed)
  37. : m_key(value),
  38. m_counter(0),
  39. m_context(queue.get_context())
  40. {
  41. // Load program
  42. load_program();
  43. }
  44. /// Creates a new threefry_engine object as a copy of \p other.
  45. threefry_engine(const threefry_engine<T> &other)
  46. : m_key(other.m_key),
  47. m_counter(other.m_counter),
  48. m_context(other.m_context),
  49. m_program(other.m_program)
  50. {
  51. }
  52. /// Copies \p other to \c *this.
  53. threefry_engine<T>& operator=(const threefry_engine<T> &other)
  54. {
  55. if(this != &other){
  56. m_key = other.m_key;
  57. m_counter = other.m_counter;
  58. m_context = other.m_context;
  59. m_program = other.m_program;
  60. }
  61. return *this;
  62. }
  63. /// Destroys the threefry_engine object.
  64. ~threefry_engine()
  65. {
  66. }
  67. /// Seeds the random number generator with \p value.
  68. ///
  69. /// \param value seed value for the random-number generator
  70. /// \param queue command queue to perform the operation
  71. ///
  72. /// If no seed value is provided, \c default_seed is used.
  73. void seed(ulong_ value, command_queue &queue)
  74. {
  75. (void) queue;
  76. m_key = value;
  77. // Reset counter
  78. m_counter = 0;
  79. }
  80. /// \overload
  81. void seed(command_queue &queue)
  82. {
  83. seed(default_seed, queue);
  84. }
  85. /// Generates random numbers and stores them to the range [\p first, \p last).
  86. template<class OutputIterator>
  87. void generate(OutputIterator first, OutputIterator last, command_queue &queue)
  88. {
  89. const size_t size = detail::iterator_range_size(first, last);
  90. kernel fill_kernel(m_program, "fill");
  91. fill_kernel.set_arg(0, first.get_buffer());
  92. fill_kernel.set_arg(1, static_cast<const uint_>(size));
  93. fill_kernel.set_arg(2, m_key);
  94. fill_kernel.set_arg(3, m_counter);
  95. queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0);
  96. discard(size, queue);
  97. }
  98. /// \internal_
  99. void generate(discard_iterator first, discard_iterator last, command_queue &queue)
  100. {
  101. (void) queue;
  102. ulong_ offset = std::distance(first, last);
  103. m_counter += offset;
  104. }
  105. /// Generates random numbers, transforms them with \p op, and then stores
  106. /// them to the range [\p first, \p last).
  107. template<class OutputIterator, class Function>
  108. void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue)
  109. {
  110. vector<T> tmp(std::distance(first, last), queue.get_context());
  111. generate(tmp.begin(), tmp.end(), queue);
  112. ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue);
  113. }
  114. /// Generates \p z random numbers and discards them.
  115. void discard(size_t z, command_queue &queue)
  116. {
  117. generate(discard_iterator(0), discard_iterator(z), queue);
  118. }
  119. private:
  120. void load_program()
  121. {
  122. boost::shared_ptr<program_cache> cache =
  123. program_cache::get_global_cache(m_context);
  124. std::string cache_key =
  125. std::string("__boost_threefry_engine_32x2");
  126. // Copyright 2010-2012, D. E. Shaw Research.
  127. // All rights reserved.
  128. // Redistribution and use in source and binary forms, with or without
  129. // modification, are permitted provided that the following conditions are
  130. // met:
  131. // * Redistributions of source code must retain the above copyright
  132. // notice, this list of conditions, and the following disclaimer.
  133. // * Redistributions in binary form must reproduce the above copyright
  134. // notice, this list of conditions, and the following disclaimer in the
  135. // documentation and/or other materials provided with the distribution.
  136. // * Neither the name of D. E. Shaw Research nor the names of its
  137. // contributors may be used to endorse or promote products derived from
  138. // this software without specific prior written permission.
  139. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  140. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  141. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  142. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  143. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  144. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  145. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  146. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  147. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  148. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  149. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  150. const char source[] =
  151. "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
  152. "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"
  153. "enum r123_enum_threefry32x2 {\n"
  154. " R_32x2_0_0=13,\n"
  155. " R_32x2_1_0=15,\n"
  156. " R_32x2_2_0=26,\n"
  157. " R_32x2_3_0= 6,\n"
  158. " R_32x2_4_0=17,\n"
  159. " R_32x2_5_0=29,\n"
  160. " R_32x2_6_0=16,\n"
  161. " R_32x2_7_0=24\n"
  162. "};\n"
  163. "static uint RotL_32(uint x, uint N)\n"
  164. "{\n"
  165. " return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
  166. "}\n"
  167. "struct r123array2x32 {\n"
  168. " uint v[2];\n"
  169. "};\n"
  170. "typedef struct r123array2x32 threefry2x32_ctr_t;\n"
  171. "typedef struct r123array2x32 threefry2x32_key_t;\n"
  172. "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
  173. "{\n"
  174. " threefry2x32_ctr_t X;\n"
  175. " uint ks[3];\n"
  176. " uint i; \n"
  177. " ks[2] = SKEIN_KS_PARITY_32;\n"
  178. " for (i=0;i < 2; i++) {\n"
  179. " ks[i] = k.v[i];\n"
  180. " X.v[i] = in.v[i];\n"
  181. " ks[2] ^= k.v[i];\n"
  182. " }\n"
  183. " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
  184. " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
  185. " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
  186. " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
  187. " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
  188. " if(Nrounds>3){\n"
  189. " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
  190. " X.v[1] += 1;\n"
  191. " }\n"
  192. " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
  193. " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
  194. " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
  195. " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
  196. " if(Nrounds>7){\n"
  197. " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
  198. " X.v[1] += 2;\n"
  199. " }\n"
  200. " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
  201. " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
  202. " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
  203. " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
  204. " if(Nrounds>11){\n"
  205. " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
  206. " X.v[1] += 3;\n"
  207. " }\n"
  208. " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
  209. " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
  210. " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
  211. " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
  212. " if(Nrounds>15){\n"
  213. " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
  214. " X.v[1] += 4;\n"
  215. " }\n"
  216. " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
  217. " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
  218. " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
  219. " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
  220. " if(Nrounds>19){\n"
  221. " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
  222. " X.v[1] += 5;\n"
  223. " }\n"
  224. " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
  225. " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
  226. " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
  227. " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
  228. " if(Nrounds>23){\n"
  229. " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
  230. " X.v[1] += 6;\n"
  231. " }\n"
  232. " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
  233. " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
  234. " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
  235. " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
  236. " if(Nrounds>27){\n"
  237. " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
  238. " X.v[1] += 7;\n"
  239. " }\n"
  240. " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
  241. " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
  242. " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
  243. " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
  244. " if(Nrounds>31){\n"
  245. " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
  246. " X.v[1] += 8;\n"
  247. " }\n"
  248. " return X;\n"
  249. "}\n"
  250. "__kernel void fill(__global uint * output,\n"
  251. " const uint output_size,\n"
  252. " const uint2 key,\n"
  253. " const uint2 counter)\n"
  254. "{\n"
  255. " uint gid = get_global_id(0);\n"
  256. " threefry2x32_ctr_t c;\n"
  257. " c.v[0] = counter.x + gid;\n"
  258. " c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n"
  259. "\n"
  260. " threefry2x32_key_t k = { {key.x, key.y} };\n"
  261. "\n"
  262. " threefry2x32_ctr_t result;\n"
  263. " result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n"
  264. "\n"
  265. " if(gid < output_size/2)\n"
  266. " {\n"
  267. " output[2 * gid] = result.v[0];\n"
  268. " output[2 * gid + 1] = result.v[1];\n"
  269. " }\n"
  270. " else if(gid < (output_size+1)/2)\n"
  271. " output[2 * gid] = result.v[0];\n"
  272. "}\n";
  273. m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
  274. }
  275. // Engine state
  276. ulong_ m_key; // 2 x 32bit
  277. ulong_ m_counter;
  278. // OpenCL
  279. context m_context;
  280. program m_program;
  281. };
  282. } // end compute namespace
  283. } // end boost namespace
  284. #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP