123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- // Copyright 2004 The Trustees of Indiana University.
- // Use, modification and distribution is subject to the Boost Software
- // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- // Authors: Douglas Gregor
- // Andrew Lumsdaine
- // This program performs betweenness centrality (BC) clustering on the
- // actor collaboration graph available at
- // http://www.nd.edu/~networks/resources/actor/actor.dat.gz and outputs the
- // result of clustering in Pajek format.
- //
- // This program mimics the BC clustering algorithm program implemented
- // by Shashikant Penumarthy for JUNG, so that we may compare results
- // and timings.
- #include <boost/graph/bc_clustering.hpp>
- #include <boost/graph/adjacency_list.hpp>
- #include <boost/graph/graph_traits.hpp>
- #include <fstream>
- #include <iostream>
- #include <string>
- #include <boost/tokenizer.hpp>
- #include <boost/lexical_cast.hpp>
- #include <map>
- using namespace boost;
- struct Actor
- {
- Actor(int id = -1) : id(id) {}
- int id;
- };
- typedef adjacency_list<vecS, vecS, undirectedS, Actor,
- property<edge_centrality_t, double> > ActorGraph;
- typedef graph_traits<ActorGraph>::vertex_descriptor Vertex;
- typedef graph_traits<ActorGraph>::edge_descriptor Edge;
- void load_actor_graph(std::istream& in, ActorGraph& g)
- {
- std::map<int, Vertex> actors;
- std::string line;
- while (getline(in, line)) {
- std::vector<Vertex> actors_in_movie;
- // Map from the actor numbers on this line to the actor vertices
- typedef tokenizer<char_separator<char> > Tok;
- Tok tok(line, char_separator<char>(" "));
- for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) {
- int actor_id = lexical_cast<int>(*id);
- std::map<int, Vertex>::iterator v = actors.find(actor_id);
- if (v == actors.end()) {
- Vertex new_vertex = add_vertex(Actor(actor_id), g);
- actors[actor_id] = new_vertex;
- actors_in_movie.push_back(new_vertex);
- } else {
- actors_in_movie.push_back(v->second);
- }
- }
- for (std::vector<Vertex>::iterator i = actors_in_movie.begin();
- i != actors_in_movie.end(); ++i) {
- for (std::vector<Vertex>::iterator j = i + 1;
- j != actors_in_movie.end(); ++j) {
- if (!edge(*i, *j, g).second) add_edge(*i, *j, g);
- }
- }
- }
- }
- template<typename Graph, typename VertexIndexMap, typename VertexNameMap>
- std::ostream&
- write_pajek_graph(std::ostream& out, const Graph& g,
- VertexIndexMap vertex_index, VertexNameMap vertex_name)
- {
- out << "*Vertices " << num_vertices(g) << '\n';
- typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator;
- for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v) {
- out << get(vertex_index, *v)+1 << " \"" << get(vertex_name, *v) << "\"\n";
- }
- out << "*Edges\n";
- typedef typename graph_traits<Graph>::edge_iterator edge_iterator;
- for (edge_iterator e = edges(g).first; e != edges(g).second; ++e) {
- out << get(vertex_index, source(*e, g))+1 << ' '
- << get(vertex_index, target(*e, g))+1 << " 1.0\n"; // HACK!
- }
- return out;
- }
- class actor_clustering_threshold : public bc_clustering_threshold<double>
- {
- typedef bc_clustering_threshold<double> inherited;
- public:
- actor_clustering_threshold(double threshold, const ActorGraph& g,
- bool normalize)
- : inherited(threshold, g, normalize), iter(1) { }
- bool operator()(double max_centrality, Edge e, const ActorGraph& g)
- {
- std::cout << "Iter: " << iter << " Max Centrality: "
- << (max_centrality / dividend) << std::endl;
- ++iter;
- return inherited::operator()(max_centrality, e, g);
- }
- private:
- unsigned int iter;
- };
- int main(int argc, char* argv[])
- {
- std::string in_file;
- std::string out_file;
- double threshold = -1.0;
- bool normalize = false;
- // Parse command-line options
- {
- int on_arg = 1;
- while (on_arg < argc) {
- std::string arg(argv[on_arg]);
- if (arg == "-in") {
- ++on_arg; assert(on_arg < argc);
- in_file = argv[on_arg];
- } else if (arg == "-out") {
- ++on_arg; assert(on_arg < argc);
- out_file = argv[on_arg];
- } else if (arg == "-threshold") {
- ++on_arg; assert(on_arg < argc);
- threshold = lexical_cast<double>(argv[on_arg]);
- } else if (arg == "-normalize") {
- normalize = true;
- } else {
- std::cerr << "Unrecognized parameter \"" << arg << "\".\n";
- return -1;
- }
- ++on_arg;
- }
- if (in_file.empty() || out_file.empty() || threshold < 0) {
- std::cerr << "error: syntax is actor_clustering [options]\n\n"
- << "options are:\n"
- << "\t-in <infile>\tInput file\n"
- << "\t-out <outfile>\tOutput file\n"
- << "\t-threshold <value>\tA threshold value\n"
- << "\t-normalize\tNormalize edge centrality scores\n";
- return -1;
- }
- }
- ActorGraph g;
- // Load the actor graph
- {
- std::cout << "Building graph." << std::endl;
- std::ifstream in(in_file.c_str());
- if (!in) {
- std::cerr << "Unable to open file \"" << in_file << "\" for input.\n";
- return -2;
- }
- load_actor_graph(in, g);
- }
- // Run the algorithm
- std::cout << "Clusting..." << std::endl;
- betweenness_centrality_clustering(g,
- actor_clustering_threshold(threshold, g, normalize),
- get(edge_centrality, g));
- // Output the graph
- {
- std::cout << "Writing graph to file: " << out_file << std::endl;
- std::ofstream out(out_file.c_str());
- if (!out) {
- std::cerr << "Unable to open file \"" << out_file << "\" for output.\n";
- return -3;
- }
- write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g));
- }
- return 0;
- }
|