// Copyright 2004 The Trustees of Indiana University. // Use, modification and distribution is subject to the Boost Software // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Authors: Douglas Gregor // Andrew Lumsdaine // This program performs betweenness centrality (BC) clustering on the // actor collaboration graph available at // http://www.nd.edu/~networks/resources/actor/actor.dat.gz and outputs the // result of clustering in Pajek format. // // This program mimics the BC clustering algorithm program implemented // by Shashikant Penumarthy for JUNG, so that we may compare results // and timings. #include #include #include #include #include #include #include #include #include using namespace boost; struct Actor { Actor(int id = -1) : id(id) {} int id; }; typedef adjacency_list > ActorGraph; typedef graph_traits::vertex_descriptor Vertex; typedef graph_traits::edge_descriptor Edge; void load_actor_graph(std::istream& in, ActorGraph& g) { std::map actors; std::string line; while (getline(in, line)) { std::vector actors_in_movie; // Map from the actor numbers on this line to the actor vertices typedef tokenizer > Tok; Tok tok(line, char_separator(" ")); for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) { int actor_id = lexical_cast(*id); std::map::iterator v = actors.find(actor_id); if (v == actors.end()) { Vertex new_vertex = add_vertex(Actor(actor_id), g); actors[actor_id] = new_vertex; actors_in_movie.push_back(new_vertex); } else { actors_in_movie.push_back(v->second); } } for (std::vector::iterator i = actors_in_movie.begin(); i != actors_in_movie.end(); ++i) { for (std::vector::iterator j = i + 1; j != actors_in_movie.end(); ++j) { if (!edge(*i, *j, g).second) add_edge(*i, *j, g); } } } } template std::ostream& write_pajek_graph(std::ostream& out, const Graph& g, VertexIndexMap vertex_index, VertexNameMap vertex_name) { out << "*Vertices " << num_vertices(g) << '\n'; typedef typename graph_traits::vertex_iterator vertex_iterator; for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v) { out << get(vertex_index, *v)+1 << " \"" << get(vertex_name, *v) << "\"\n"; } out << "*Edges\n"; typedef typename graph_traits::edge_iterator edge_iterator; for (edge_iterator e = edges(g).first; e != edges(g).second; ++e) { out << get(vertex_index, source(*e, g))+1 << ' ' << get(vertex_index, target(*e, g))+1 << " 1.0\n"; // HACK! } return out; } class actor_clustering_threshold : public bc_clustering_threshold { typedef bc_clustering_threshold inherited; public: actor_clustering_threshold(double threshold, const ActorGraph& g, bool normalize) : inherited(threshold, g, normalize), iter(1) { } bool operator()(double max_centrality, Edge e, const ActorGraph& g) { std::cout << "Iter: " << iter << " Max Centrality: " << (max_centrality / dividend) << std::endl; ++iter; return inherited::operator()(max_centrality, e, g); } private: unsigned int iter; }; int main(int argc, char* argv[]) { std::string in_file; std::string out_file; double threshold = -1.0; bool normalize = false; // Parse command-line options { int on_arg = 1; while (on_arg < argc) { std::string arg(argv[on_arg]); if (arg == "-in") { ++on_arg; assert(on_arg < argc); in_file = argv[on_arg]; } else if (arg == "-out") { ++on_arg; assert(on_arg < argc); out_file = argv[on_arg]; } else if (arg == "-threshold") { ++on_arg; assert(on_arg < argc); threshold = lexical_cast(argv[on_arg]); } else if (arg == "-normalize") { normalize = true; } else { std::cerr << "Unrecognized parameter \"" << arg << "\".\n"; return -1; } ++on_arg; } if (in_file.empty() || out_file.empty() || threshold < 0) { std::cerr << "error: syntax is actor_clustering [options]\n\n" << "options are:\n" << "\t-in \tInput file\n" << "\t-out \tOutput file\n" << "\t-threshold \tA threshold value\n" << "\t-normalize\tNormalize edge centrality scores\n"; return -1; } } ActorGraph g; // Load the actor graph { std::cout << "Building graph." << std::endl; std::ifstream in(in_file.c_str()); if (!in) { std::cerr << "Unable to open file \"" << in_file << "\" for input.\n"; return -2; } load_actor_graph(in, g); } // Run the algorithm std::cout << "Clusting..." << std::endl; betweenness_centrality_clustering(g, actor_clustering_threshold(threshold, g, normalize), get(edge_centrality, g)); // Output the graph { std::cout << "Writing graph to file: " << out_file << std::endl; std::ofstream out(out_file.c_str()); if (!out) { std::cerr << "Unable to open file \"" << out_file << "\" for output.\n"; return -3; } write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g)); } return 0; }