connected_components_parallel_search.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. // Copyright (C) 2004-2006 The Trustees of Indiana University.
  2. // Use, modification and distribution is subject to the Boost Software
  3. // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. // Authors: Brian Barrett
  6. // Douglas Gregor
  7. // Andrew Lumsdaine
  8. #ifndef BOOST_GRAPH_PARALLEL_CC_PS_HPP
  9. #define BOOST_GRAPH_PARALLEL_CC_PS_HPP
  10. #ifndef BOOST_GRAPH_USE_MPI
  11. #error "Parallel BGL files should not be included unless <boost/graph/use_mpi.hpp> has been included"
  12. #endif
  13. #include <boost/assert.hpp>
  14. #include <boost/property_map/property_map.hpp>
  15. #include <boost/graph/parallel/algorithm.hpp>
  16. #include <boost/pending/indirect_cmp.hpp>
  17. #include <boost/graph/graph_traits.hpp>
  18. #include <boost/graph/overloading.hpp>
  19. #include <boost/graph/distributed/concepts.hpp>
  20. #include <boost/graph/parallel/properties.hpp>
  21. #include <boost/graph/parallel/process_group.hpp>
  22. #include <boost/optional.hpp>
  23. #include <algorithm>
  24. #include <vector>
  25. #include <queue>
  26. #include <limits>
  27. #include <map>
  28. #include <boost/graph/parallel/container_traits.hpp>
  29. #include <boost/graph/iteration_macros.hpp>
  30. // Connected components algorithm based on a parallel search.
  31. //
  32. // Every N nodes starts a parallel search from the first vertex in
  33. // their local vertex list during the first superstep (the other nodes
  34. // remain idle during the first superstep to reduce the number of
  35. // conflicts in numbering the components). At each superstep, all new
  36. // component mappings from remote nodes are handled. If there is no
  37. // work from remote updates, a new vertex is removed from the local
  38. // list and added to the work queue.
  39. //
  40. // Components are allocated from the component_value_allocator object,
  41. // which ensures that a given component number is unique in the
  42. // system, currently by using the rank and number of processes to
  43. // stride allocations.
  44. //
  45. // When two components are discovered to actually be the same
  46. // component, a mapping is created in the collisions object. The
  47. // lower component number is prefered in the resolution, so component
  48. // numbering resolution is consistent. After the search has exhausted
  49. // all vertices in the graph, the mapping is shared with all
  50. // processes, and they independently resolve the comonent mapping (so
  51. // O((N * NP) + (V * NP)) work, in O(N + V) time, where N is the
  52. // number of mappings and V is the number of local vertices). This
  53. // phase can likely be significantly sped up if a clever algorithm for
  54. // the reduction can be found.
  55. namespace boost { namespace graph { namespace distributed {
  56. namespace cc_ps_detail {
  57. // Local object for allocating component numbers. There are two
  58. // places this happens in the code, and I was getting sick of them
  59. // getting out of sync. Components are not tightly packed in
  60. // numbering, but are numbered to ensure each rank has its own
  61. // independent sets of numberings.
  62. template<typename component_value_type>
  63. class component_value_allocator {
  64. public:
  65. component_value_allocator(int num, int size) :
  66. last(0), num(num), size(size)
  67. {
  68. }
  69. component_value_type allocate(void)
  70. {
  71. component_value_type ret = num + (last * size);
  72. last++;
  73. return ret;
  74. }
  75. private:
  76. component_value_type last;
  77. int num;
  78. int size;
  79. };
  80. // Map of the "collisions" between component names in the global
  81. // component mapping. TO make cleanup easier, component numbers
  82. // are added, pointing to themselves, when a new component is
  83. // found. In order to make the results deterministic, the lower
  84. // component number is always taken. The resolver will drill
  85. // through the map until it finds a component entry that points to
  86. // itself as the next value, allowing some cleanup to happen at
  87. // update() time. Attempts are also made to update the mapping
  88. // when new entries are created.
  89. //
  90. // Note that there's an assumption that the entire mapping is
  91. // shared during the end of the algorithm, but before component
  92. // name resolution.
  93. template<typename component_value_type>
  94. class collision_map {
  95. public:
  96. collision_map() : num_unique(0)
  97. {
  98. }
  99. // add new component mapping first time component is used. Own
  100. // function only so that we can sanity check there isn't already
  101. // a mapping for that component number (which would be bad)
  102. void add(const component_value_type &a)
  103. {
  104. BOOST_ASSERT(collisions.count(a) == 0);
  105. collisions[a] = a;
  106. }
  107. // add a mapping between component values saying they're the
  108. // same component
  109. void add(const component_value_type &a, const component_value_type &b)
  110. {
  111. component_value_type high, low, tmp;
  112. if (a > b) {
  113. high = a;
  114. low = b;
  115. } else {
  116. high = b;
  117. low = a;
  118. }
  119. if (collisions.count(high) != 0 && collisions[high] != low) {
  120. tmp = collisions[high];
  121. if (tmp > low) {
  122. collisions[tmp] = low;
  123. collisions[high] = low;
  124. } else {
  125. collisions[low] = tmp;
  126. collisions[high] = tmp;
  127. }
  128. } else {
  129. collisions[high] = low;
  130. }
  131. }
  132. // get the "real" component number for the given component.
  133. // Used to resolve mapping at end of run.
  134. component_value_type update(component_value_type a)
  135. {
  136. BOOST_ASSERT(num_unique > 0);
  137. BOOST_ASSERT(collisions.count(a) != 0);
  138. return collisions[a];
  139. }
  140. // collapse the collisions tree, so that update is a one lookup
  141. // operation. Count unique components at the same time.
  142. void uniqify(void)
  143. {
  144. typename std::map<component_value_type, component_value_type>::iterator i, end;
  145. end = collisions.end();
  146. for (i = collisions.begin() ; i != end ; ++i) {
  147. if (i->first == i->second) {
  148. num_unique++;
  149. } else {
  150. i->second = collisions[i->second];
  151. }
  152. }
  153. }
  154. // get the number of component entries that have an associated
  155. // component number of themselves, which are the real components
  156. // used in the final mapping. This is the number of unique
  157. // components in the graph.
  158. int unique(void)
  159. {
  160. BOOST_ASSERT(num_unique > 0);
  161. return num_unique;
  162. }
  163. // "serialize" into a vector for communication.
  164. std::vector<component_value_type> serialize(void)
  165. {
  166. std::vector<component_value_type> ret;
  167. typename std::map<component_value_type, component_value_type>::iterator i, end;
  168. end = collisions.end();
  169. for (i = collisions.begin() ; i != end ; ++i) {
  170. ret.push_back(i->first);
  171. ret.push_back(i->second);
  172. }
  173. return ret;
  174. }
  175. private:
  176. std::map<component_value_type, component_value_type> collisions;
  177. int num_unique;
  178. };
  179. // resolver to handle remote updates. The resolver will add
  180. // entries into the collisions map if required, and if it is the
  181. // first time the vertex has been touched, it will add the vertex
  182. // to the remote queue. Note that local updates are handled
  183. // differently, in the main loop (below).
  184. // BWB - FIX ME - don't need graph anymore - can pull from key value of Component Map.
  185. template<typename ComponentMap, typename work_queue>
  186. struct update_reducer {
  187. BOOST_STATIC_CONSTANT(bool, non_default_resolver = false);
  188. typedef typename property_traits<ComponentMap>::value_type component_value_type;
  189. typedef typename property_traits<ComponentMap>::key_type vertex_descriptor;
  190. update_reducer(work_queue *q,
  191. cc_ps_detail::collision_map<component_value_type> *collisions,
  192. processor_id_type pg_id) :
  193. q(q), collisions(collisions), pg_id(pg_id)
  194. {
  195. }
  196. // ghost cell initialization routine. This should never be
  197. // called in this imlementation.
  198. template<typename K>
  199. component_value_type operator()(const K&) const
  200. {
  201. return component_value_type(0);
  202. }
  203. // resolver for remote updates. I'm not entirely sure why, but
  204. // I decided to not change the value of the vertex if it's
  205. // already non-infinite. It doesn't matter in the end, as we'll
  206. // touch every vertex in the cleanup phase anyway. If the
  207. // component is currently infinite, set to the new component
  208. // number and add the vertex to the work queue. If it's not
  209. // infinite, we've touched it already so don't add it to the
  210. // work queue. Do add a collision entry so that we know the two
  211. // components are the same.
  212. component_value_type operator()(const vertex_descriptor &v,
  213. const component_value_type& current,
  214. const component_value_type& update) const
  215. {
  216. const component_value_type max = (std::numeric_limits<component_value_type>::max)();
  217. component_value_type ret = current;
  218. if (max == current) {
  219. q->push(v);
  220. ret = update;
  221. } else if (current != update) {
  222. collisions->add(current, update);
  223. }
  224. return ret;
  225. }
  226. // So for whatever reason, the property map can in theory call
  227. // the resolver with a local descriptor in addition to the
  228. // standard global descriptor. As far as I can tell, this code
  229. // path is never taken in this implementation, but I need to
  230. // have this code here to make it compile. We just make a
  231. // global descriptor and call the "real" operator().
  232. template<typename K>
  233. component_value_type operator()(const K& v,
  234. const component_value_type& current,
  235. const component_value_type& update) const
  236. {
  237. return (*this)(vertex_descriptor(pg_id, v), current, update);
  238. }
  239. private:
  240. work_queue *q;
  241. collision_map<component_value_type> *collisions;
  242. boost::processor_id_type pg_id;
  243. };
  244. } // namespace cc_ps_detail
  245. template<typename Graph, typename ComponentMap>
  246. typename property_traits<ComponentMap>::value_type
  247. connected_components_ps(const Graph& g, ComponentMap c)
  248. {
  249. using boost::graph::parallel::process_group;
  250. typedef typename property_traits<ComponentMap>::value_type component_value_type;
  251. typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator;
  252. typedef typename graph_traits<Graph>::vertex_descriptor vertex_descriptor;
  253. typedef typename boost::graph::parallel::process_group_type<Graph>
  254. ::type process_group_type;
  255. typedef typename process_group_type::process_id_type process_id_type;
  256. typedef std::queue<vertex_descriptor> work_queue;
  257. static const component_value_type max_component =
  258. (std::numeric_limits<component_value_type>::max)();
  259. typename property_map<Graph, vertex_owner_t>::const_type
  260. owner = get(vertex_owner, g);
  261. // standard who am i? stuff
  262. process_group_type pg = process_group(g);
  263. process_id_type id = process_id(pg);
  264. // Initialize every vertex to have infinite component number
  265. BGL_FORALL_VERTICES_T(v, g, Graph) put(c, v, max_component);
  266. vertex_iterator current, end;
  267. boost::tie(current, end) = vertices(g);
  268. cc_ps_detail::component_value_allocator<component_value_type> cva(process_id(pg), num_processes(pg));
  269. cc_ps_detail::collision_map<component_value_type> collisions;
  270. work_queue q; // this is intentionally a local data structure
  271. c.set_reduce(cc_ps_detail::update_reducer<ComponentMap, work_queue>(&q, &collisions, id));
  272. // add starting work
  273. while (true) {
  274. bool useful_found = false;
  275. component_value_type val = cva.allocate();
  276. put(c, *current, val);
  277. collisions.add(val);
  278. q.push(*current);
  279. if (0 != out_degree(*current, g)) useful_found = true;
  280. ++current;
  281. if (useful_found) break;
  282. }
  283. // Run the loop until everyone in the system is done
  284. bool global_done = false;
  285. while (!global_done) {
  286. // drain queue of work for this superstep
  287. while (!q.empty()) {
  288. vertex_descriptor v = q.front();
  289. q.pop();
  290. // iterate through outedges of the vertex currently being
  291. // examined, setting their component to our component. There
  292. // is no way to end up in the queue without having a component
  293. // number already.
  294. BGL_FORALL_ADJ_T(v, peer, g, Graph) {
  295. component_value_type my_component = get(c, v);
  296. // update other vertex with our component information.
  297. // Resolver will handle remote collisions as well as whether
  298. // to put the vertex on the work queue or not. We have to
  299. // handle local collisions and work queue management
  300. if (id == get(owner, peer)) {
  301. if (max_component == get(c, peer)) {
  302. put(c, peer, my_component);
  303. q.push(peer);
  304. } else if (my_component != get(c, peer)) {
  305. collisions.add(my_component, get(c, peer));
  306. }
  307. } else {
  308. put(c, peer, my_component);
  309. }
  310. }
  311. }
  312. // synchronize / start a new superstep.
  313. synchronize(pg);
  314. global_done = all_reduce(pg, (q.empty() && (current == end)), boost::parallel::minimum<bool>());
  315. // If the queue is currently empty, add something to do to start
  316. // the current superstep (supersteps start at the sync, not at
  317. // the top of the while loop as one might expect). Down at the
  318. // bottom of the while loop so that not everyone starts the
  319. // algorithm with something to do, to try to reduce component
  320. // name conflicts
  321. if (q.empty()) {
  322. bool useful_found = false;
  323. for ( ; current != end && !useful_found ; ++current) {
  324. if (max_component == get(c, *current)) {
  325. component_value_type val = cva.allocate();
  326. put(c, *current, val);
  327. collisions.add(val);
  328. q.push(*current);
  329. if (0 != out_degree(*current, g)) useful_found = true;
  330. }
  331. }
  332. }
  333. }
  334. // share component mappings
  335. std::vector<component_value_type> global;
  336. std::vector<component_value_type> mine = collisions.serialize();
  337. all_gather(pg, mine.begin(), mine.end(), global);
  338. for (size_t i = 0 ; i < global.size() ; i += 2) {
  339. collisions.add(global[i], global[i + 1]);
  340. }
  341. collisions.uniqify();
  342. // update the component mappings
  343. BGL_FORALL_VERTICES_T(v, g, Graph) {
  344. put(c, v, collisions.update(get(c, v)));
  345. }
  346. return collisions.unique();
  347. }
  348. } // end namespace distributed
  349. } // end namespace graph
  350. } // end namespace boost
  351. #endif // BOOST_GRAPH_PARALLEL_CC_HPP