fill.hpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_FILL_HPP
  12. #include <iterator>
  13. #include <boost/static_assert.hpp>
  14. #include <boost/mpl/int.hpp>
  15. #include <boost/mpl/vector.hpp>
  16. #include <boost/mpl/contains.hpp>
  17. #include <boost/utility/enable_if.hpp>
  18. #include <boost/compute/cl.hpp>
  19. #include <boost/compute/system.hpp>
  20. #include <boost/compute/command_queue.hpp>
  21. #include <boost/compute/algorithm/copy.hpp>
  22. #include <boost/compute/async/future.hpp>
  23. #include <boost/compute/iterator/constant_iterator.hpp>
  24. #include <boost/compute/iterator/discard_iterator.hpp>
  25. #include <boost/compute/detail/is_buffer_iterator.hpp>
  26. #include <boost/compute/detail/iterator_range_size.hpp>
  27. #include <boost/compute/type_traits/is_device_iterator.hpp>
  28. namespace boost {
  29. namespace compute {
  30. namespace detail {
  31. namespace mpl = boost::mpl;
  32. // fills the range [first, first + count) with value using copy()
  33. template<class BufferIterator, class T>
  34. inline void fill_with_copy(BufferIterator first,
  35. size_t count,
  36. const T &value,
  37. command_queue &queue)
  38. {
  39. ::boost::compute::copy(
  40. ::boost::compute::make_constant_iterator(value, 0),
  41. ::boost::compute::make_constant_iterator(value, count),
  42. first,
  43. queue
  44. );
  45. }
  46. // fills the range [first, first + count) with value using copy_async()
  47. template<class BufferIterator, class T>
  48. inline future<void> fill_async_with_copy(BufferIterator first,
  49. size_t count,
  50. const T &value,
  51. command_queue &queue)
  52. {
  53. return ::boost::compute::copy_async(
  54. ::boost::compute::make_constant_iterator(value, 0),
  55. ::boost::compute::make_constant_iterator(value, count),
  56. first,
  57. queue
  58. );
  59. }
  60. #if defined(BOOST_COMPUTE_CL_VERSION_1_2)
  61. // meta-function returing true if Iterator points to a range of values
  62. // that can be filled using clEnqueueFillBuffer(). to meet this criteria
  63. // it must have a buffer accessible through iter.get_buffer() and the
  64. // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
  65. template<class Iterator>
  66. struct is_valid_fill_buffer_iterator :
  67. public mpl::and_<
  68. is_buffer_iterator<Iterator>,
  69. mpl::contains<
  70. mpl::vector<
  71. mpl::int_<1>,
  72. mpl::int_<2>,
  73. mpl::int_<4>,
  74. mpl::int_<8>,
  75. mpl::int_<16>,
  76. mpl::int_<32>,
  77. mpl::int_<64>,
  78. mpl::int_<128>
  79. >,
  80. mpl::int_<
  81. sizeof(typename std::iterator_traits<Iterator>::value_type)
  82. >
  83. >
  84. >::type { };
  85. template<>
  86. struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
  87. // specialization which uses clEnqueueFillBuffer for buffer iterators
  88. template<class BufferIterator, class T>
  89. inline void
  90. dispatch_fill(BufferIterator first,
  91. size_t count,
  92. const T &value,
  93. command_queue &queue,
  94. typename boost::enable_if<
  95. is_valid_fill_buffer_iterator<BufferIterator>
  96. >::type* = 0)
  97. {
  98. typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
  99. if(count == 0){
  100. // nothing to do
  101. return;
  102. }
  103. // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
  104. if(!queue.check_device_version(1, 2)){
  105. return fill_with_copy(first, count, value, queue);
  106. }
  107. value_type pattern = static_cast<value_type>(value);
  108. size_t offset = static_cast<size_t>(first.get_index());
  109. if(count == 1){
  110. // use clEnqueueWriteBuffer() directly when writing a single value
  111. // to the device buffer. this is potentially more efficient and also
  112. // works around a bug in the intel opencl driver.
  113. queue.enqueue_write_buffer(
  114. first.get_buffer(),
  115. offset * sizeof(value_type),
  116. sizeof(value_type),
  117. &pattern
  118. );
  119. }
  120. else {
  121. queue.enqueue_fill_buffer(
  122. first.get_buffer(),
  123. &pattern,
  124. sizeof(value_type),
  125. offset * sizeof(value_type),
  126. count * sizeof(value_type)
  127. );
  128. }
  129. }
  130. template<class BufferIterator, class T>
  131. inline future<void>
  132. dispatch_fill_async(BufferIterator first,
  133. size_t count,
  134. const T &value,
  135. command_queue &queue,
  136. typename boost::enable_if<
  137. is_valid_fill_buffer_iterator<BufferIterator>
  138. >::type* = 0)
  139. {
  140. typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
  141. // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
  142. if(!queue.check_device_version(1, 2)){
  143. return fill_async_with_copy(first, count, value, queue);
  144. }
  145. value_type pattern = static_cast<value_type>(value);
  146. size_t offset = static_cast<size_t>(first.get_index());
  147. event event_ =
  148. queue.enqueue_fill_buffer(first.get_buffer(),
  149. &pattern,
  150. sizeof(value_type),
  151. offset * sizeof(value_type),
  152. count * sizeof(value_type));
  153. return future<void>(event_);
  154. }
  155. #ifdef BOOST_COMPUTE_CL_VERSION_2_0
  156. // specializations for svm_ptr<T>
  157. template<class T>
  158. inline void dispatch_fill(svm_ptr<T> first,
  159. size_t count,
  160. const T &value,
  161. command_queue &queue)
  162. {
  163. if(count == 0){
  164. return;
  165. }
  166. queue.enqueue_svm_fill(
  167. first.get(), &value, sizeof(T), count * sizeof(T)
  168. );
  169. }
  170. template<class T>
  171. inline future<void> dispatch_fill_async(svm_ptr<T> first,
  172. size_t count,
  173. const T &value,
  174. command_queue &queue)
  175. {
  176. if(count == 0){
  177. return future<void>();
  178. }
  179. event event_ = queue.enqueue_svm_fill(
  180. first.get(), &value, sizeof(T), count * sizeof(T)
  181. );
  182. return future<void>(event_);
  183. }
  184. #endif // BOOST_COMPUTE_CL_VERSION_2_0
  185. // default implementations
  186. template<class BufferIterator, class T>
  187. inline void
  188. dispatch_fill(BufferIterator first,
  189. size_t count,
  190. const T &value,
  191. command_queue &queue,
  192. typename boost::disable_if<
  193. is_valid_fill_buffer_iterator<BufferIterator>
  194. >::type* = 0)
  195. {
  196. fill_with_copy(first, count, value, queue);
  197. }
  198. template<class BufferIterator, class T>
  199. inline future<void>
  200. dispatch_fill_async(BufferIterator first,
  201. size_t count,
  202. const T &value,
  203. command_queue &queue,
  204. typename boost::disable_if<
  205. is_valid_fill_buffer_iterator<BufferIterator>
  206. >::type* = 0)
  207. {
  208. return fill_async_with_copy(first, count, value, queue);
  209. }
  210. #else
  211. template<class BufferIterator, class T>
  212. inline void dispatch_fill(BufferIterator first,
  213. size_t count,
  214. const T &value,
  215. command_queue &queue)
  216. {
  217. fill_with_copy(first, count, value, queue);
  218. }
  219. template<class BufferIterator, class T>
  220. inline future<void> dispatch_fill_async(BufferIterator first,
  221. size_t count,
  222. const T &value,
  223. command_queue &queue)
  224. {
  225. return fill_async_with_copy(first, count, value, queue);
  226. }
  227. #endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2)
  228. } // end detail namespace
  229. /// Fills the range [\p first, \p last) with \p value.
  230. ///
  231. /// \param first first element in the range to fill
  232. /// \param last last element in the range to fill
  233. /// \param value value to copy to each element
  234. /// \param queue command queue to perform the operation
  235. ///
  236. /// For example, to fill a vector on the device with sevens:
  237. /// \code
  238. /// // vector on the device
  239. /// boost::compute::vector<int> vec(10, context);
  240. ///
  241. /// // fill vector with sevens
  242. /// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
  243. /// \endcode
  244. ///
  245. /// Space complexity: \Omega(1)
  246. ///
  247. /// \see boost::compute::fill_n()
  248. template<class BufferIterator, class T>
  249. inline void fill(BufferIterator first,
  250. BufferIterator last,
  251. const T &value,
  252. command_queue &queue = system::default_queue())
  253. {
  254. BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value);
  255. size_t count = detail::iterator_range_size(first, last);
  256. if(count == 0){
  257. return;
  258. }
  259. detail::dispatch_fill(first, count, value, queue);
  260. }
  261. template<class BufferIterator, class T>
  262. inline future<void> fill_async(BufferIterator first,
  263. BufferIterator last,
  264. const T &value,
  265. command_queue &queue = system::default_queue())
  266. {
  267. BOOST_STATIC_ASSERT(detail::is_buffer_iterator<BufferIterator>::value);
  268. size_t count = detail::iterator_range_size(first, last);
  269. if(count == 0){
  270. return future<void>();
  271. }
  272. return detail::dispatch_fill_async(first, count, value, queue);
  273. }
  274. } // end compute namespace
  275. } // end boost namespace
  276. #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP