actor_clustering.cpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // Copyright 2004 The Trustees of Indiana University.
  2. // Use, modification and distribution is subject to the Boost Software
  3. // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. // Authors: Douglas Gregor
  6. // Andrew Lumsdaine
  7. // This program performs betweenness centrality (BC) clustering on the
  8. // actor collaboration graph available at
  9. // http://www.nd.edu/~networks/resources/actor/actor.dat.gz and outputs the
  10. // result of clustering in Pajek format.
  11. //
  12. // This program mimics the BC clustering algorithm program implemented
  13. // by Shashikant Penumarthy for JUNG, so that we may compare results
  14. // and timings.
  15. #include <boost/graph/bc_clustering.hpp>
  16. #include <boost/graph/adjacency_list.hpp>
  17. #include <boost/graph/graph_traits.hpp>
  18. #include <fstream>
  19. #include <iostream>
  20. #include <string>
  21. #include <boost/tokenizer.hpp>
  22. #include <boost/lexical_cast.hpp>
  23. #include <map>
  24. using namespace boost;
  25. struct Actor
  26. {
  27. Actor(int id = -1) : id(id) {}
  28. int id;
  29. };
  30. typedef adjacency_list<vecS, vecS, undirectedS, Actor,
  31. property<edge_centrality_t, double> > ActorGraph;
  32. typedef graph_traits<ActorGraph>::vertex_descriptor Vertex;
  33. typedef graph_traits<ActorGraph>::edge_descriptor Edge;
  34. void load_actor_graph(std::istream& in, ActorGraph& g)
  35. {
  36. std::map<int, Vertex> actors;
  37. std::string line;
  38. while (getline(in, line)) {
  39. std::vector<Vertex> actors_in_movie;
  40. // Map from the actor numbers on this line to the actor vertices
  41. typedef tokenizer<char_separator<char> > Tok;
  42. Tok tok(line, char_separator<char>(" "));
  43. for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) {
  44. int actor_id = lexical_cast<int>(*id);
  45. std::map<int, Vertex>::iterator v = actors.find(actor_id);
  46. if (v == actors.end()) {
  47. Vertex new_vertex = add_vertex(Actor(actor_id), g);
  48. actors[actor_id] = new_vertex;
  49. actors_in_movie.push_back(new_vertex);
  50. } else {
  51. actors_in_movie.push_back(v->second);
  52. }
  53. }
  54. for (std::vector<Vertex>::iterator i = actors_in_movie.begin();
  55. i != actors_in_movie.end(); ++i) {
  56. for (std::vector<Vertex>::iterator j = i + 1;
  57. j != actors_in_movie.end(); ++j) {
  58. if (!edge(*i, *j, g).second) add_edge(*i, *j, g);
  59. }
  60. }
  61. }
  62. }
  63. template<typename Graph, typename VertexIndexMap, typename VertexNameMap>
  64. std::ostream&
  65. write_pajek_graph(std::ostream& out, const Graph& g,
  66. VertexIndexMap vertex_index, VertexNameMap vertex_name)
  67. {
  68. out << "*Vertices " << num_vertices(g) << '\n';
  69. typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator;
  70. for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v) {
  71. out << get(vertex_index, *v)+1 << " \"" << get(vertex_name, *v) << "\"\n";
  72. }
  73. out << "*Edges\n";
  74. typedef typename graph_traits<Graph>::edge_iterator edge_iterator;
  75. for (edge_iterator e = edges(g).first; e != edges(g).second; ++e) {
  76. out << get(vertex_index, source(*e, g))+1 << ' '
  77. << get(vertex_index, target(*e, g))+1 << " 1.0\n"; // HACK!
  78. }
  79. return out;
  80. }
  81. class actor_clustering_threshold : public bc_clustering_threshold<double>
  82. {
  83. typedef bc_clustering_threshold<double> inherited;
  84. public:
  85. actor_clustering_threshold(double threshold, const ActorGraph& g,
  86. bool normalize)
  87. : inherited(threshold, g, normalize), iter(1) { }
  88. bool operator()(double max_centrality, Edge e, const ActorGraph& g)
  89. {
  90. std::cout << "Iter: " << iter << " Max Centrality: "
  91. << (max_centrality / dividend) << std::endl;
  92. ++iter;
  93. return inherited::operator()(max_centrality, e, g);
  94. }
  95. private:
  96. unsigned int iter;
  97. };
  98. int main(int argc, char* argv[])
  99. {
  100. std::string in_file;
  101. std::string out_file;
  102. double threshold = -1.0;
  103. bool normalize = false;
  104. // Parse command-line options
  105. {
  106. int on_arg = 1;
  107. while (on_arg < argc) {
  108. std::string arg(argv[on_arg]);
  109. if (arg == "-in") {
  110. ++on_arg; assert(on_arg < argc);
  111. in_file = argv[on_arg];
  112. } else if (arg == "-out") {
  113. ++on_arg; assert(on_arg < argc);
  114. out_file = argv[on_arg];
  115. } else if (arg == "-threshold") {
  116. ++on_arg; assert(on_arg < argc);
  117. threshold = lexical_cast<double>(argv[on_arg]);
  118. } else if (arg == "-normalize") {
  119. normalize = true;
  120. } else {
  121. std::cerr << "Unrecognized parameter \"" << arg << "\".\n";
  122. return -1;
  123. }
  124. ++on_arg;
  125. }
  126. if (in_file.empty() || out_file.empty() || threshold < 0) {
  127. std::cerr << "error: syntax is actor_clustering [options]\n\n"
  128. << "options are:\n"
  129. << "\t-in <infile>\tInput file\n"
  130. << "\t-out <outfile>\tOutput file\n"
  131. << "\t-threshold <value>\tA threshold value\n"
  132. << "\t-normalize\tNormalize edge centrality scores\n";
  133. return -1;
  134. }
  135. }
  136. ActorGraph g;
  137. // Load the actor graph
  138. {
  139. std::cout << "Building graph." << std::endl;
  140. std::ifstream in(in_file.c_str());
  141. if (!in) {
  142. std::cerr << "Unable to open file \"" << in_file << "\" for input.\n";
  143. return -2;
  144. }
  145. load_actor_graph(in, g);
  146. }
  147. // Run the algorithm
  148. std::cout << "Clusting..." << std::endl;
  149. betweenness_centrality_clustering(g,
  150. actor_clustering_threshold(threshold, g, normalize),
  151. get(edge_centrality, g));
  152. // Output the graph
  153. {
  154. std::cout << "Writing graph to file: " << out_file << std::endl;
  155. std::ofstream out(out_file.c_str());
  156. if (!out) {
  157. std::cerr << "Unable to open file \"" << out_file << "\" for output.\n";
  158. return -3;
  159. }
  160. write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g));
  161. }
  162. return 0;
  163. }