reduce.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>.
  2. // Copyright (C) 2004 The Trustees of Indiana University
  3. // Use, modification and distribution is subject to the Boost Software
  4. // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6. // Authors: Douglas Gregor
  7. // Andrew Lumsdaine
  8. // Message Passing Interface 1.1 -- Section 4.9.1. Reduce
  9. #ifndef BOOST_MPI_REDUCE_HPP
  10. #define BOOST_MPI_REDUCE_HPP
  11. #include <boost/mpi/exception.hpp>
  12. #include <boost/mpi/datatype.hpp>
  13. // For (de-)serializing sends and receives
  14. #include <boost/mpi/packed_oarchive.hpp>
  15. #include <boost/mpi/packed_iarchive.hpp>
  16. // For packed_[io]archive sends and receives
  17. #include <boost/mpi/detail/point_to_point.hpp>
  18. #include <boost/mpi/communicator.hpp>
  19. #include <boost/mpi/environment.hpp>
  20. #include <boost/mpi/detail/computation_tree.hpp>
  21. #include <boost/mpi/operations.hpp>
  22. #include <algorithm>
  23. #include <exception>
  24. #include <boost/assert.hpp>
  25. #include <boost/scoped_array.hpp>
  26. namespace boost { namespace mpi {
  27. /************************************************************************
  28. * Implementation details *
  29. ************************************************************************/
  30. namespace detail {
  31. /**********************************************************************
  32. * Simple reduction with MPI_Reduce *
  33. **********************************************************************/
  34. // We are reducing at the root for a type that has an associated MPI
  35. // datatype and operation, so we'll use MPI_Reduce directly.
  36. template<typename T, typename Op>
  37. void
  38. reduce_impl(const communicator& comm, const T* in_values, int n,
  39. T* out_values, Op /*op*/, int root, mpl::true_ /*is_mpi_op*/,
  40. mpl::true_/*is_mpi_datatype*/)
  41. {
  42. BOOST_MPI_CHECK_RESULT(MPI_Reduce,
  43. (const_cast<T*>(in_values), out_values, n,
  44. boost::mpi::get_mpi_datatype<T>(*in_values),
  45. (is_mpi_op<Op, T>::op()), root, comm));
  46. }
  47. // We are reducing to the root for a type that has an associated MPI
  48. // datatype and operation, so we'll use MPI_Reduce directly.
  49. template<typename T, typename Op>
  50. void
  51. reduce_impl(const communicator& comm, const T* in_values, int n, Op /*op*/,
  52. int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
  53. {
  54. BOOST_MPI_CHECK_RESULT(MPI_Reduce,
  55. (const_cast<T*>(in_values), 0, n,
  56. boost::mpi::get_mpi_datatype<T>(*in_values),
  57. (is_mpi_op<Op, T>::op()), root, comm));
  58. }
  59. /**********************************************************************
  60. * User-defined reduction with MPI_Reduce *
  61. **********************************************************************/
  62. // We are reducing at the root for a type that has an associated MPI
  63. // datatype but with a custom operation. We'll use MPI_Reduce
  64. // directly, but we'll need to create an MPI_Op manually.
  65. template<typename T, typename Op>
  66. void
  67. reduce_impl(const communicator& comm, const T* in_values, int n,
  68. T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
  69. mpl::true_/*is_mpi_datatype*/)
  70. {
  71. user_op<Op, T> mpi_op;
  72. BOOST_MPI_CHECK_RESULT(MPI_Reduce,
  73. (const_cast<T*>(in_values), out_values, n,
  74. boost::mpi::get_mpi_datatype<T>(*in_values),
  75. mpi_op.get_mpi_op(), root, comm));
  76. }
  77. // We are reducing to the root for a type that has an associated MPI
  78. // datatype but with a custom operation. We'll use MPI_Reduce
  79. // directly, but we'll need to create an MPI_Op manually.
  80. template<typename T, typename Op>
  81. void
  82. reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
  83. int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
  84. {
  85. user_op<Op, T> mpi_op;
  86. BOOST_MPI_CHECK_RESULT(MPI_Reduce,
  87. (const_cast<T*>(in_values), 0, n,
  88. boost::mpi::get_mpi_datatype<T>(*in_values),
  89. mpi_op.get_mpi_op(), root, comm));
  90. }
  91. /**********************************************************************
  92. * User-defined, tree-based reduction for non-MPI data types *
  93. **********************************************************************/
  94. // Commutative reduction
  95. template<typename T, typename Op>
  96. void
  97. tree_reduce_impl(const communicator& comm, const T* in_values, int n,
  98. T* out_values, Op op, int root,
  99. mpl::true_ /*is_commutative*/)
  100. {
  101. std::copy(in_values, in_values + n, out_values);
  102. int size = comm.size();
  103. int rank = comm.rank();
  104. // The computation tree we will use.
  105. detail::computation_tree tree(rank, size, root);
  106. int tag = environment::collectives_tag();
  107. MPI_Status status;
  108. int children = 0;
  109. for (int child = tree.child_begin();
  110. children < tree.branching_factor() && child != root;
  111. ++children, child = (child + 1) % size) {
  112. // Receive archive
  113. packed_iarchive ia(comm);
  114. detail::packed_archive_recv(comm, child, tag, ia, status);
  115. T incoming;
  116. for (int i = 0; i < n; ++i) {
  117. ia >> incoming;
  118. out_values[i] = op(out_values[i], incoming);
  119. }
  120. }
  121. // For non-roots, send the result to the parent.
  122. if (tree.parent() != rank) {
  123. packed_oarchive oa(comm);
  124. for (int i = 0; i < n; ++i)
  125. oa << out_values[i];
  126. detail::packed_archive_send(comm, tree.parent(), tag, oa);
  127. }
  128. }
  129. // Commutative reduction from a non-root.
  130. template<typename T, typename Op>
  131. void
  132. tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
  133. int root, mpl::true_ /*is_commutative*/)
  134. {
  135. scoped_array<T> results(new T[n]);
  136. detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root,
  137. mpl::true_());
  138. }
  139. // Non-commutative reduction
  140. template<typename T, typename Op>
  141. void
  142. tree_reduce_impl(const communicator& comm, const T* in_values, int n,
  143. T* out_values, Op op, int root,
  144. mpl::false_ /*is_commutative*/)
  145. {
  146. int tag = environment::collectives_tag();
  147. int left_child = root / 2;
  148. int right_child = (root + comm.size()) / 2;
  149. MPI_Status status;
  150. if (left_child != root) {
  151. // Receive value from the left child and merge it with the value
  152. // we had incoming.
  153. packed_iarchive ia(comm);
  154. detail::packed_archive_recv(comm, left_child, tag, ia, status);
  155. T incoming;
  156. for (int i = 0; i < n; ++i) {
  157. ia >> incoming;
  158. out_values[i] = op(incoming, in_values[i]);
  159. }
  160. } else {
  161. // There was no left value, so copy our incoming value.
  162. std::copy(in_values, in_values + n, out_values);
  163. }
  164. if (right_child != root) {
  165. // Receive value from the right child and merge it with the
  166. // value we had incoming.
  167. packed_iarchive ia(comm);
  168. detail::packed_archive_recv(comm, right_child, tag, ia, status);
  169. T incoming;
  170. for (int i = 0; i < n; ++i) {
  171. ia >> incoming;
  172. out_values[i] = op(out_values[i], incoming);
  173. }
  174. }
  175. }
  176. // Non-commutative reduction from a non-root.
  177. template<typename T, typename Op>
  178. void
  179. tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
  180. int root, mpl::false_ /*is_commutative*/)
  181. {
  182. int size = comm.size();
  183. int rank = comm.rank();
  184. int tag = environment::collectives_tag();
  185. // Determine our parents and children in the commutative binary
  186. // computation tree.
  187. int grandparent = root;
  188. int parent = root;
  189. int left_bound = 0;
  190. int right_bound = size;
  191. int left_child, right_child;
  192. do {
  193. left_child = (left_bound + parent) / 2;
  194. right_child = (parent + right_bound) / 2;
  195. if (rank < parent) {
  196. // Go left.
  197. grandparent = parent;
  198. right_bound = parent;
  199. parent = left_child;
  200. } else if (rank > parent) {
  201. // Go right.
  202. grandparent = parent;
  203. left_bound = parent + 1;
  204. parent = right_child;
  205. } else {
  206. // We've found the parent
  207. break;
  208. }
  209. } while (true);
  210. // Our parent is the grandparent of our children. This is a slight
  211. // abuse of notation, but it makes the send-to-parent below make
  212. // more sense.
  213. parent = grandparent;
  214. MPI_Status status;
  215. scoped_array<T> out_values(new T[n]);
  216. if (left_child != rank) {
  217. // Receive value from the left child and merge it with the value
  218. // we had incoming.
  219. packed_iarchive ia(comm);
  220. detail::packed_archive_recv(comm, left_child, tag, ia, status);
  221. T incoming;
  222. for (int i = 0; i < n; ++i) {
  223. ia >> incoming;
  224. out_values[i] = op(incoming, in_values[i]);
  225. }
  226. } else {
  227. // There was no left value, so copy our incoming value.
  228. std::copy(in_values, in_values + n, out_values.get());
  229. }
  230. if (right_child != rank) {
  231. // Receive value from the right child and merge it with the
  232. // value we had incoming.
  233. packed_iarchive ia(comm);
  234. detail::packed_archive_recv(comm, right_child, tag, ia, status);
  235. T incoming;
  236. for (int i = 0; i < n; ++i) {
  237. ia >> incoming;
  238. out_values[i] = op(out_values[i], incoming);
  239. }
  240. }
  241. // Send the combined value to our parent.
  242. packed_oarchive oa(comm);
  243. for (int i = 0; i < n; ++i)
  244. oa << out_values[i];
  245. detail::packed_archive_send(comm, parent, tag, oa);
  246. }
  247. // We are reducing at the root for a type that has no associated MPI
  248. // datatype and operation, so we'll use a simple tree-based
  249. // algorithm.
  250. template<typename T, typename Op>
  251. void
  252. reduce_impl(const communicator& comm, const T* in_values, int n,
  253. T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
  254. mpl::false_ /*is_mpi_datatype*/)
  255. {
  256. detail::tree_reduce_impl(comm, in_values, n, out_values, op, root,
  257. is_commutative<Op, T>());
  258. }
  259. // We are reducing to the root for a type that has no associated MPI
  260. // datatype and operation, so we'll use a simple tree-based
  261. // algorithm.
  262. template<typename T, typename Op>
  263. void
  264. reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
  265. int root, mpl::false_ /*is_mpi_op*/,
  266. mpl::false_ /*is_mpi_datatype*/)
  267. {
  268. detail::tree_reduce_impl(comm, in_values, n, op, root,
  269. is_commutative<Op, T>());
  270. }
  271. } // end namespace detail
  272. template<typename T, typename Op>
  273. void
  274. reduce(const communicator& comm, const T* in_values, int n, T* out_values,
  275. Op op, int root)
  276. {
  277. if (comm.rank() == root)
  278. detail::reduce_impl(comm, in_values, n, out_values, op, root,
  279. is_mpi_op<Op, T>(), is_mpi_datatype<T>());
  280. else
  281. detail::reduce_impl(comm, in_values, n, op, root,
  282. is_mpi_op<Op, T>(), is_mpi_datatype<T>());
  283. }
  284. template<typename T, typename Op>
  285. void
  286. reduce(const communicator& comm, const T* in_values, int n, Op op, int root)
  287. {
  288. BOOST_ASSERT(comm.rank() != root);
  289. detail::reduce_impl(comm, in_values, n, op, root,
  290. is_mpi_op<Op, T>(), is_mpi_datatype<T>());
  291. }
  292. template<typename T, typename Op>
  293. void
  294. reduce(const communicator & comm, std::vector<T> const & in_values, Op op,
  295. int root)
  296. {
  297. reduce(comm, &in_values.front(), in_values.size(), op, root);
  298. }
  299. template<typename T, typename Op>
  300. void
  301. reduce(const communicator & comm, std::vector<T> const & in_values,
  302. std::vector<T> & out_values, Op op, int root)
  303. {
  304. if (root == comm.rank()) out_values.resize(in_values.size());
  305. reduce(comm, &in_values.front(), in_values.size(), &out_values.front(), op,
  306. root);
  307. }
  308. template<typename T, typename Op>
  309. void
  310. reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
  311. int root)
  312. {
  313. if (comm.rank() == root)
  314. detail::reduce_impl(comm, &in_value, 1, &out_value, op, root,
  315. is_mpi_op<Op, T>(), is_mpi_datatype<T>());
  316. else
  317. detail::reduce_impl(comm, &in_value, 1, op, root,
  318. is_mpi_op<Op, T>(), is_mpi_datatype<T>());
  319. }
  320. template<typename T, typename Op>
  321. void reduce(const communicator& comm, const T& in_value, Op op, int root)
  322. {
  323. BOOST_ASSERT(comm.rank() != root);
  324. detail::reduce_impl(comm, &in_value, 1, op, root,
  325. is_mpi_op<Op, T>(), is_mpi_datatype<T>());
  326. }
  327. } } // end namespace boost::mpi
  328. #endif // BOOST_MPI_REDUCE_HPP