rmat_graph_generator.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. // Copyright 2004, 2005 The Trustees of Indiana University.
  2. // Use, modification and distribution is subject to the Boost Software
  3. // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. // Authors: Nick Edmonds
  6. // Andrew Lumsdaine
  7. #ifndef BOOST_GRAPH_RMAT_GENERATOR_HPP
  8. #define BOOST_GRAPH_RMAT_GENERATOR_HPP
  9. #include <math.h>
  10. #include <iterator>
  11. #include <utility>
  12. #include <vector>
  13. #include <queue>
  14. #include <map>
  15. #include <boost/shared_ptr.hpp>
  16. #include <boost/assert.hpp>
  17. #include <boost/random/uniform_int.hpp>
  18. #include <boost/random/uniform_01.hpp>
  19. #include <boost/graph/graph_traits.hpp>
  20. #include <boost/graph/detail/mpi_include.hpp>
  21. #include <boost/type_traits/is_base_and_derived.hpp>
  22. #include <boost/type_traits/is_same.hpp>
  23. // #include <boost/test/floating_point_comparison.hpp>
  24. using boost::shared_ptr;
  25. using boost::uniform_01;
  26. // Returns floor(log_2(n)), and -1 when n is 0
  27. template <typename IntegerType>
  28. inline int int_log2(IntegerType n) {
  29. int l = 0;
  30. while (n > 0) {++l; n >>= 1;}
  31. return l - 1;
  32. }
  33. struct keep_all_edges {
  34. template <typename T>
  35. bool operator()(const T&, const T&) { return true; }
  36. };
  37. template <typename Distribution, typename ProcessId>
  38. struct keep_local_edges {
  39. keep_local_edges(const Distribution& distrib, const ProcessId& id)
  40. : distrib(distrib), id(id)
  41. { }
  42. template <typename T>
  43. bool operator()(const T& x, const T& y)
  44. { return distrib(x) == id || distrib(y) == id; }
  45. private:
  46. const Distribution& distrib;
  47. const ProcessId& id;
  48. };
  49. template <typename RandomGenerator, typename T>
  50. void
  51. generate_permutation_vector(RandomGenerator& gen, std::vector<T>& vertexPermutation, T n)
  52. {
  53. using boost::uniform_int;
  54. vertexPermutation.resize(n);
  55. // Generate permutation map of vertex numbers
  56. uniform_int<T> rand_vertex(0, n-1);
  57. for (T i = 0; i < n; ++i)
  58. vertexPermutation[i] = i;
  59. // Can't use std::random_shuffle unless we create another (synchronized) PRNG
  60. for (T i = 0; i < n; ++i)
  61. std::swap(vertexPermutation[i], vertexPermutation[rand_vertex(gen)]);
  62. }
  63. template <typename RandomGenerator, typename T>
  64. std::pair<T,T>
  65. generate_edge(shared_ptr<uniform_01<RandomGenerator> > prob, T n,
  66. unsigned int SCALE, double a, double b, double c, double d)
  67. {
  68. T u = 0, v = 0;
  69. T step = n/2;
  70. for (unsigned int j = 0; j < SCALE; ++j) {
  71. double p = (*prob)();
  72. if (p < a)
  73. ;
  74. else if (p >= a && p < a + b)
  75. v += step;
  76. else if (p >= a + b && p < a + b + c)
  77. u += step;
  78. else { // p > a + b + c && p < a + b + c + d
  79. u += step;
  80. v += step;
  81. }
  82. step /= 2;
  83. // 0.2 and 0.9 are hardcoded in the reference SSCA implementation.
  84. // The maximum change in any given value should be less than 10%
  85. a *= 0.9 + 0.2 * (*prob)();
  86. b *= 0.9 + 0.2 * (*prob)();
  87. c *= 0.9 + 0.2 * (*prob)();
  88. d *= 0.9 + 0.2 * (*prob)();
  89. double S = a + b + c + d;
  90. a /= S; b /= S; c /= S;
  91. // d /= S;
  92. // Ensure all values add up to 1, regardless of floating point errors
  93. d = 1. - a - b - c;
  94. }
  95. return std::make_pair(u, v);
  96. }
  97. namespace boost {
  98. /*
  99. Chakrabarti's R-MAT scale free generator.
  100. For all flavors of the R-MAT iterator a+b+c+d must equal 1 and for the
  101. unique_rmat_iterator 'm' << 'n^2'. If 'm' is too close to 'n^2' the
  102. generator may be unable to generate sufficient unique edges
  103. To get a true scale free distribution {a, b, c, d : a > b, a > c, a > d}
  104. */
  105. template<typename RandomGenerator, typename Graph>
  106. class rmat_iterator
  107. {
  108. typedef typename graph_traits<Graph>::directed_category directed_category;
  109. typedef typename graph_traits<Graph>::vertices_size_type vertices_size_type;
  110. typedef typename graph_traits<Graph>::edges_size_type edges_size_type;
  111. public:
  112. typedef std::input_iterator_tag iterator_category;
  113. typedef std::pair<vertices_size_type, vertices_size_type> value_type;
  114. typedef const value_type& reference;
  115. typedef const value_type* pointer;
  116. typedef std::ptrdiff_t difference_type; // Not used
  117. // No argument constructor, set to terminating condition
  118. rmat_iterator()
  119. : gen(), edge(0) { }
  120. // Initialize for edge generation
  121. rmat_iterator(RandomGenerator& gen, vertices_size_type n,
  122. edges_size_type m, double a, double b, double c,
  123. double d, bool permute_vertices = true)
  124. : gen(), n(n), a(a), b(b), c(c), d(d), edge(m),
  125. permute_vertices(permute_vertices),
  126. SCALE(int_log2(n))
  127. {
  128. this->gen.reset(new uniform_01<RandomGenerator>(gen));
  129. // BOOST_ASSERT(boost::test_tools::check_is_close(a + b + c + d, 1., 1.e-5));
  130. if (permute_vertices)
  131. generate_permutation_vector(gen, vertexPermutation, n);
  132. // TODO: Generate the entire adjacency matrix then "Clip and flip" if undirected graph
  133. // Generate the first edge
  134. vertices_size_type u, v;
  135. boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);
  136. if (permute_vertices)
  137. current = std::make_pair(vertexPermutation[u],
  138. vertexPermutation[v]);
  139. else
  140. current = std::make_pair(u, v);
  141. --edge;
  142. }
  143. reference operator*() const { return current; }
  144. pointer operator->() const { return &current; }
  145. rmat_iterator& operator++()
  146. {
  147. vertices_size_type u, v;
  148. boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);
  149. if (permute_vertices)
  150. current = std::make_pair(vertexPermutation[u],
  151. vertexPermutation[v]);
  152. else
  153. current = std::make_pair(u, v);
  154. --edge;
  155. return *this;
  156. }
  157. rmat_iterator operator++(int)
  158. {
  159. rmat_iterator temp(*this);
  160. ++(*this);
  161. return temp;
  162. }
  163. bool operator==(const rmat_iterator& other) const
  164. {
  165. return edge == other.edge;
  166. }
  167. bool operator!=(const rmat_iterator& other) const
  168. { return !(*this == other); }
  169. private:
  170. // Parameters
  171. shared_ptr<uniform_01<RandomGenerator> > gen;
  172. vertices_size_type n;
  173. double a, b, c, d;
  174. int edge;
  175. bool permute_vertices;
  176. int SCALE;
  177. // Internal data structures
  178. std::vector<vertices_size_type> vertexPermutation;
  179. value_type current;
  180. };
  181. // Sorted version for CSR
  182. template <typename T>
  183. struct sort_pair {
  184. bool operator() (const std::pair<T,T>& x, const std::pair<T,T>& y)
  185. {
  186. if (x.first == y.first)
  187. return x.second > y.second;
  188. else
  189. return x.first > y.first;
  190. }
  191. };
  192. template<typename RandomGenerator, typename Graph,
  193. typename EdgePredicate = keep_all_edges>
  194. class sorted_rmat_iterator
  195. {
  196. typedef typename graph_traits<Graph>::directed_category directed_category;
  197. typedef typename graph_traits<Graph>::vertices_size_type vertices_size_type;
  198. typedef typename graph_traits<Graph>::edges_size_type edges_size_type;
  199. public:
  200. typedef std::input_iterator_tag iterator_category;
  201. typedef std::pair<vertices_size_type, vertices_size_type> value_type;
  202. typedef const value_type& reference;
  203. typedef const value_type* pointer;
  204. typedef std::ptrdiff_t difference_type; // Not used
  205. // No argument constructor, set to terminating condition
  206. sorted_rmat_iterator()
  207. : gen(), values(sort_pair<vertices_size_type>()), done(true)
  208. { }
  209. // Initialize for edge generation
  210. sorted_rmat_iterator(RandomGenerator& gen, vertices_size_type n,
  211. edges_size_type m, double a, double b, double c,
  212. double d, bool permute_vertices = true,
  213. EdgePredicate ep = keep_all_edges())
  214. : gen(), permute_vertices(permute_vertices),
  215. values(sort_pair<vertices_size_type>()), done(false)
  216. {
  217. // BOOST_ASSERT(boost::test_tools::check_is_close(a + b + c + d, 1., 1.e-5));
  218. this->gen.reset(new uniform_01<RandomGenerator>(gen));
  219. std::vector<vertices_size_type> vertexPermutation;
  220. if (permute_vertices)
  221. generate_permutation_vector(gen, vertexPermutation, n);
  222. // TODO: "Clip and flip" if undirected graph
  223. int SCALE = int_log2(n);
  224. for (edges_size_type i = 0; i < m; ++i) {
  225. vertices_size_type u, v;
  226. boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);
  227. if (permute_vertices) {
  228. if (ep(vertexPermutation[u], vertexPermutation[v]))
  229. values.push(std::make_pair(vertexPermutation[u], vertexPermutation[v]));
  230. } else {
  231. if (ep(u, v))
  232. values.push(std::make_pair(u, v));
  233. }
  234. }
  235. current = values.top();
  236. values.pop();
  237. }
  238. reference operator*() const { return current; }
  239. pointer operator->() const { return &current; }
  240. sorted_rmat_iterator& operator++()
  241. {
  242. if (!values.empty()) {
  243. current = values.top();
  244. values.pop();
  245. } else
  246. done = true;
  247. return *this;
  248. }
  249. sorted_rmat_iterator operator++(int)
  250. {
  251. sorted_rmat_iterator temp(*this);
  252. ++(*this);
  253. return temp;
  254. }
  255. bool operator==(const sorted_rmat_iterator& other) const
  256. {
  257. return values.empty() && other.values.empty() && done && other.done;
  258. }
  259. bool operator!=(const sorted_rmat_iterator& other) const
  260. { return !(*this == other); }
  261. private:
  262. // Parameters
  263. shared_ptr<uniform_01<RandomGenerator> > gen;
  264. bool permute_vertices;
  265. // Internal data structures
  266. std::priority_queue<value_type, std::vector<value_type>, sort_pair<vertices_size_type> > values;
  267. value_type current;
  268. bool done;
  269. };
  270. // This version is slow but guarantees unique edges
  271. template<typename RandomGenerator, typename Graph,
  272. typename EdgePredicate = keep_all_edges>
  273. class unique_rmat_iterator
  274. {
  275. typedef typename graph_traits<Graph>::directed_category directed_category;
  276. typedef typename graph_traits<Graph>::vertices_size_type vertices_size_type;
  277. typedef typename graph_traits<Graph>::edges_size_type edges_size_type;
  278. public:
  279. typedef std::input_iterator_tag iterator_category;
  280. typedef std::pair<vertices_size_type, vertices_size_type> value_type;
  281. typedef const value_type& reference;
  282. typedef const value_type* pointer;
  283. typedef std::ptrdiff_t difference_type; // Not used
  284. // No argument constructor, set to terminating condition
  285. unique_rmat_iterator()
  286. : gen(), done(true)
  287. { }
  288. // Initialize for edge generation
  289. unique_rmat_iterator(RandomGenerator& gen, vertices_size_type n,
  290. edges_size_type m, double a, double b, double c,
  291. double d, bool permute_vertices = true,
  292. EdgePredicate ep = keep_all_edges())
  293. : gen(), done(false)
  294. {
  295. // BOOST_ASSERT(boost::test_tools::check_is_close(a + b + c + d, 1., 1.e-5));
  296. this->gen.reset(new uniform_01<RandomGenerator>(gen));
  297. std::vector<vertices_size_type> vertexPermutation;
  298. if (permute_vertices)
  299. generate_permutation_vector(gen, vertexPermutation, n);
  300. int SCALE = int_log2(n);
  301. std::map<value_type, bool> edge_map;
  302. edges_size_type edges = 0;
  303. do {
  304. vertices_size_type u, v;
  305. boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);
  306. // Lowest vertex number always comes first
  307. // (this means we don't have to worry about i->j and j->i being in the edge list)
  308. if (u > v && is_same<directed_category, undirected_tag>::value)
  309. std::swap(u, v);
  310. if (edge_map.find(std::make_pair(u, v)) == edge_map.end()) {
  311. edge_map[std::make_pair(u, v)] = true;
  312. if (permute_vertices) {
  313. if (ep(vertexPermutation[u], vertexPermutation[v]))
  314. values.push_back(std::make_pair(vertexPermutation[u], vertexPermutation[v]));
  315. } else {
  316. if (ep(u, v))
  317. values.push_back(std::make_pair(u, v));
  318. }
  319. edges++;
  320. }
  321. } while (edges < m);
  322. // NGE - Asking for more than n^2 edges will result in an infinite loop here
  323. // Asking for a value too close to n^2 edges may as well
  324. current = values.back();
  325. values.pop_back();
  326. }
  327. reference operator*() const { return current; }
  328. pointer operator->() const { return &current; }
  329. unique_rmat_iterator& operator++()
  330. {
  331. if (!values.empty()) {
  332. current = values.back();
  333. values.pop_back();
  334. } else
  335. done = true;
  336. return *this;
  337. }
  338. unique_rmat_iterator operator++(int)
  339. {
  340. unique_rmat_iterator temp(*this);
  341. ++(*this);
  342. return temp;
  343. }
  344. bool operator==(const unique_rmat_iterator& other) const
  345. {
  346. return values.empty() && other.values.empty() && done && other.done;
  347. }
  348. bool operator!=(const unique_rmat_iterator& other) const
  349. { return !(*this == other); }
  350. private:
  351. // Parameters
  352. shared_ptr<uniform_01<RandomGenerator> > gen;
  353. // Internal data structures
  354. std::vector<value_type> values;
  355. value_type current;
  356. bool done;
  357. };
  358. // This version is slow but guarantees unique edges
  359. template<typename RandomGenerator, typename Graph,
  360. typename EdgePredicate = keep_all_edges>
  361. class sorted_unique_rmat_iterator
  362. {
  363. typedef typename graph_traits<Graph>::directed_category directed_category;
  364. typedef typename graph_traits<Graph>::vertices_size_type vertices_size_type;
  365. typedef typename graph_traits<Graph>::edges_size_type edges_size_type;
  366. public:
  367. typedef std::input_iterator_tag iterator_category;
  368. typedef std::pair<vertices_size_type, vertices_size_type> value_type;
  369. typedef const value_type& reference;
  370. typedef const value_type* pointer;
  371. typedef std::ptrdiff_t difference_type; // Not used
  372. // No argument constructor, set to terminating condition
  373. sorted_unique_rmat_iterator()
  374. : gen(), values(sort_pair<vertices_size_type>()), done(true) { }
  375. // Initialize for edge generation
  376. sorted_unique_rmat_iterator(RandomGenerator& gen, vertices_size_type n,
  377. edges_size_type m, double a, double b, double c,
  378. double d, bool bidirectional = false,
  379. bool permute_vertices = true,
  380. EdgePredicate ep = keep_all_edges())
  381. : gen(), bidirectional(bidirectional),
  382. values(sort_pair<vertices_size_type>()), done(false)
  383. {
  384. // BOOST_ASSERT(boost::test_tools::check_is_close(a + b + c + d, 1., 1.e-5));
  385. this->gen.reset(new uniform_01<RandomGenerator>(gen));
  386. std::vector<vertices_size_type> vertexPermutation;
  387. if (permute_vertices)
  388. generate_permutation_vector(gen, vertexPermutation, n);
  389. int SCALE = int_log2(n);
  390. std::map<value_type, bool> edge_map;
  391. edges_size_type edges = 0;
  392. do {
  393. vertices_size_type u, v;
  394. boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);
  395. if (bidirectional) {
  396. if (edge_map.find(std::make_pair(u, v)) == edge_map.end()) {
  397. edge_map[std::make_pair(u, v)] = true;
  398. edge_map[std::make_pair(v, u)] = true;
  399. if (ep(u, v)) {
  400. if (permute_vertices) {
  401. values.push(std::make_pair(vertexPermutation[u], vertexPermutation[v]));
  402. values.push(std::make_pair(vertexPermutation[v], vertexPermutation[u]));
  403. } else {
  404. values.push(std::make_pair(u, v));
  405. values.push(std::make_pair(v, u));
  406. }
  407. }
  408. ++edges;
  409. }
  410. } else {
  411. // Lowest vertex number always comes first
  412. // (this means we don't have to worry about i->j and j->i being in the edge list)
  413. if (u > v && is_same<directed_category, undirected_tag>::value)
  414. std::swap(u, v);
  415. if (edge_map.find(std::make_pair(u, v)) == edge_map.end()) {
  416. edge_map[std::make_pair(u, v)] = true;
  417. if (permute_vertices) {
  418. if (ep(vertexPermutation[u], vertexPermutation[v]))
  419. values.push(std::make_pair(vertexPermutation[u], vertexPermutation[v]));
  420. } else {
  421. if (ep(u, v))
  422. values.push(std::make_pair(u, v));
  423. }
  424. ++edges;
  425. }
  426. }
  427. } while (edges < m);
  428. // NGE - Asking for more than n^2 edges will result in an infinite loop here
  429. // Asking for a value too close to n^2 edges may as well
  430. current = values.top();
  431. values.pop();
  432. }
  433. reference operator*() const { return current; }
  434. pointer operator->() const { return &current; }
  435. sorted_unique_rmat_iterator& operator++()
  436. {
  437. if (!values.empty()) {
  438. current = values.top();
  439. values.pop();
  440. } else
  441. done = true;
  442. return *this;
  443. }
  444. sorted_unique_rmat_iterator operator++(int)
  445. {
  446. sorted_unique_rmat_iterator temp(*this);
  447. ++(*this);
  448. return temp;
  449. }
  450. bool operator==(const sorted_unique_rmat_iterator& other) const
  451. {
  452. return values.empty() && other.values.empty() && done && other.done;
  453. }
  454. bool operator!=(const sorted_unique_rmat_iterator& other) const
  455. { return !(*this == other); }
  456. private:
  457. // Parameters
  458. shared_ptr<uniform_01<RandomGenerator> > gen;
  459. bool bidirectional;
  460. // Internal data structures
  461. std::priority_queue<value_type, std::vector<value_type>,
  462. sort_pair<vertices_size_type> > values;
  463. value_type current;
  464. bool done;
  465. };
  466. } // end namespace boost
  467. #include BOOST_GRAPH_MPI_INCLUDE(<boost/graph/distributed/rmat_graph_generator.hpp>)
  468. #endif // BOOST_GRAPH_RMAT_GENERATOR_HPP