graph_community.hh 17.4 KB
Newer Older
1 2
// graph-tool -- a general graph modification and manipulation thingy
//
Tiago Peixoto's avatar
Tiago Peixoto committed
3
// Copyright (C) 2007-2012 Tiago de Paula Peixoto <tiago@skewed.de>
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#ifndef GRAPH_COMMUNITY_HH
#define GRAPH_COMMUNITY_HH

21 22 23 24 25 26 27 28 29
#if (GCC_VERSION >= 40400)
#   include <tr1/unordered_set>
#   include <tr1/random>
#   include <tr1/tuple>
#else
#   include <boost/tr1/unordered_set.hpp>
#   include <boost/tr1/random.hpp>
#   include <boost/tr1/tuple.hpp>
#endif
30
#include <iostream>
31 32 33
#include <fstream>
#include <iomanip>

34 35 36
#define BOOST_DISABLE_ASSERTS
#include "boost/multi_array.hpp"

37 38 39 40 41 42 43 44 45 46 47 48
#include "graph_util.hh"
#include "graph_properties.hh"

namespace graph_tool
{

using namespace std;
using namespace boost;

using std::tr1::unordered_map;
using std::tr1::unordered_set;

49
typedef tr1::mt19937 rng_t;
50 51 52 53 54 55 56

// computes the community structure through a spin glass system with
// simulated annealing

template <template <class G, class CommunityMap> class NNKS>
struct get_communities
{
57 58 59
    template <class Graph, class VertexIndex, class WeightMap,
              class CommunityMap>
    void operator()(const Graph& g, VertexIndex vertex_index, WeightMap weights,
60 61 62
                    CommunityMap s, double gamma, size_t n_iter,
                    pair<double, double> Tinterval, size_t n_spins, size_t seed,
                    pair<bool, string> verbose) const
63 64 65 66
    {
        typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
        typedef typename graph_traits<Graph>::edge_descriptor edge_t;
        typedef typename property_traits<WeightMap>::key_type weight_key_t;
67

68 69
        rng_t rng(static_cast<rng_t::result_type>(seed));

70
        tr1::variate_generator<rng_t&, tr1::uniform_real<> >
71
            random(rng, tr1::uniform_real<>());
72 73 74 75 76 77 78

        stringstream out_str;
        ofstream out_file;
        if (verbose.second != "")
        {
            out_file.open(verbose.second.c_str());
            if (!out_file.is_open())
Tiago Peixoto's avatar
Tiago Peixoto committed
79 80
                throw IOException("error opening file " + verbose.second +
                                  " for writing");
81 82 83 84 85 86 87 88
            out_file.exceptions (ifstream::eofbit | ifstream::failbit |
                                 ifstream::badbit);
        }

        double Tmin = Tinterval.first;
        double Tmax = Tinterval.second;

        unordered_map<size_t, size_t> Ns; // spin histogram
89
        CommunityMap temp_s(vertex_index, num_vertices(g));
90 91

        // init spins from [0,N-1] and global info
92
        tr1::uniform_int<size_t> sample_spin(0, n_spins-1);
93 94 95
        typename graph_traits<Graph>::vertex_iterator v,v_end;
        for (tie(v,v_end) = vertices(g); v != v_end; ++v)
        {
96
            s[*v] = temp_s[*v] = sample_spin(rng);
97 98 99 100 101
            Ns[s[*v]]++;
        }

        NNKS<Graph,CommunityMap> Nnnks(g, s); // this will retrieve the expected
                                              // number of neighbours with given
102
                                              // spin, as a function of degree
103 104 105 106 107 108 109 110 111 112 113

        // define cooling rate so that temperature starts at Tmax at temp_count
        // == 0 and reaches Tmin at temp_count == n_iter - 1
        if (Tmin < numeric_limits<double>::epsilon())
            Tmin = numeric_limits<double>::epsilon();
        double cooling_rate = -(log(Tmin)-log(Tmax))/(n_iter-1);

        // start the annealing
        for (size_t temp_count = 0; temp_count < n_iter; ++temp_count)
        {
            double T = Tmax*exp(-cooling_rate*temp_count);
114
            double E = 0;
115

116
            vector<tr1::tuple<size_t, size_t, size_t> > updates;
117 118

            // sample a new spin for every vertex
119 120 121
            int NV = num_vertices(g),i;
            #pragma omp parallel for default(shared) private(i)\
                reduction(+:E) schedule(dynamic)
122 123 124 125 126 127
            for (i = 0; i < NV; ++i)
            {
                vertex_t v = vertex(i, g);
                if (v == graph_traits<Graph>::null_vertex())
                    continue;

128 129 130 131 132 133 134 135
                size_t new_s;
                {
                    #pragma omp critical
                    new_s = sample_spin(rng);
                }

                unordered_map<size_t, double> ns; // number of neighbours with a
                                                  // given spin 's' (weighted)
136 137

                // neighborhood spins info
138
                typename graph_traits<Graph>::out_edge_iterator e, e_end;
139 140
                for (tie(e,e_end) = out_edges(v,g); e != e_end; ++e)
                {
141
                    vertex_t t = target(*e, g);
142 143 144 145
                    if (t != v)
                        ns[s[t]] += get(weights, weight_key_t(*e));
                }

146
                size_t k = out_degree_no_loops(v, g);
147

148
                double curr_e = gamma*Nnnks(k,s[v]) - ns[s[v]];
149
                double new_e = gamma*Nnnks(k,new_s) - ns[new_s];
150

151 152 153 154 155 156 157
                double r;
                {
                    #pragma omp critical
                    r = random();
                }

                if (new_e < curr_e || r < exp(-(new_e - curr_e)/T))
158
                {
159 160
                    temp_s[v] = new_s;
                    curr_e = new_e;
161 162
                    {
                        #pragma omp critical
163 164 165 166
                        updates.push_back(tr1::make_tuple(k, size_t(s[v]),
                                                          new_s));
                        Ns[s[v]]--;
                        Ns[new_s]++;
167 168
                    }
                }
169
                else
170
                {
171
                    temp_s[v] = s[v];
172
                }
173
                E += curr_e;
174
            }
175
            swap(s, temp_s);
176

177 178 179 180
            for (typeof(updates.begin()) iter = updates.begin();
                 iter != updates.end(); ++iter)
                Nnnks.Update(tr1::get<0>(*iter), tr1::get<1>(*iter),
                             tr1::get<2>(*iter));
181 182 183 184 185 186

            if (verbose.first)
            {
                for (size_t j = 0; j < out_str.str().length(); ++j)
                    cout << "\b";
                out_str.str("");
187 188 189 190 191
                size_t ns = 0;
                for (typeof(Ns.begin()) iter = Ns.begin(); iter != Ns.end();
                     ++iter)
                    if (iter->second > 0)
                        ns++;
192
                out_str << setw(lexical_cast<string>(n_iter).size())
193
                        << temp_count << " of " << n_iter
194 195 196
                        << " (" << setw(2) << (temp_count+1)*100/n_iter
                        << "%) " << "temperature: " << setw(14)
                        << setprecision(10) << T << " spins: "
197
                        << ns << " energy: " << E;
198 199 200 201 202 203
                cout << out_str.str() << flush;
            }
            if (verbose.second != "")
            {
                try
                {
204 205
                    size_t ns = 0;
                    for (typeof(Ns.begin()) iter = Ns.begin(); iter != Ns.end();
206
                         ++iter)
207 208 209 210
                        if (iter->second > 0)
                            ns++;
                    out_file << temp_count << "\t" << setprecision(10) << T
                             << "\t" << ns << "\t" << E << endl;
211 212 213
                }
                catch (ifstream::failure e)
                {
Tiago Peixoto's avatar
Tiago Peixoto committed
214 215
                    throw IOException("error writing to file " +
                                      verbose.second + ": " + e.what());
216 217 218 219
                }
            }
        }

220
        if (n_iter % 2 != 0)
221
        {
222 223 224 225
            int NV = num_vertices(g), i;
            #pragma omp parallel for default(shared) private(i)\
                schedule(dynamic)
            for (i = 0; i < NV; ++i)
226
            {
227 228 229 230
                vertex_t v = vertex(i, g);
                if (v == graph_traits<Graph>::null_vertex())
                    continue;
                temp_s[v] = s[v];
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
            }
        }

        // rename spins, starting from zero
        unordered_map<size_t,size_t> spins;
        for (tie(v,v_end) = vertices(g); v != v_end; ++v)
        {
            if (spins.find(s[*v]) == spins.end())
                spins[s[*v]] = spins.size() - 1;
            s[*v] = spins[s[*v]];
        }

    }
};

template <class Graph, class CommunityMap>
class NNKSErdosReyni
{
public:
    NNKSErdosReyni(const Graph &g, CommunityMap s)
    {
        size_t N = 0;
        double _avg_k = 0.0;
        typename graph_traits<Graph>::vertex_iterator v,v_end;
        for (tie(v,v_end) = vertices(g); v != v_end; ++v)
        {
            size_t k = out_degree_no_loops(*v,g);
            _avg_k += k;
            N++;
            _Ns[s[*v]]++;
        }
        _p = _avg_k/(N*N);
    }

    void Update(size_t k, size_t old_s, size_t s)
    {
        _Ns[old_s]--;
        if (_Ns[old_s] == 0)
            _Ns.erase(old_s);
        _Ns[s]++;
    }

    double operator()(size_t k, size_t s) const
    {
        size_t ns = 0;
        typeof(_Ns.begin()) iter = _Ns.find(s);
        if (iter != _Ns.end())
            ns = iter->second;
        return _p*ns;
    }

private:
    double _p;
    unordered_map<size_t,size_t> _Ns;
};

template <class Graph, class CommunityMap>
class NNKSUncorr
{
public:
    NNKSUncorr(const Graph &g, CommunityMap s): _g(g), _K(0)
    {
        typename graph_traits<Graph>::vertex_iterator v,v_end;
        for (tie(v,v_end) = vertices(_g); v != v_end; ++v)
        {
            size_t k = out_degree_no_loops(*v, _g);
            _K += k;
            _Ks[s[*v]] += k;
        }
    }

    void Update(size_t k, size_t old_s, size_t s)
    {
        _Ks[old_s] -= k;
        if (_Ks[old_s] == 0)
            _Ks.erase(old_s);
        _Ks[s] += k;
    }

    double operator()(size_t k, size_t s) const
    {
        size_t ks = 0;
        typeof(_Ks.begin()) iter = _Ks.find(s);
        if (iter != _Ks.end())
            ks = iter->second;
        return k*ks/double(_K);
    }

private:
    const Graph& _g;
    size_t _K;
    unordered_map<size_t,size_t> _Ks;
};

template <class Graph, class CommunityMap>
class NNKSCorr
{
public:
    NNKSCorr(const Graph &g, CommunityMap s): _g(g)
    {
        unordered_set<size_t> spins;

        typename graph_traits<Graph>::vertex_iterator v,v_end;
        for (tie(v,v_end) = vertices(_g); v != v_end; ++v)
        {
            size_t k = out_degree_no_loops(*v, _g);
            _Nk[k]++;
            _Nks[k][s[*v]]++;
            spins.insert(s[*v]);
        }

        size_t E = 0;
        typename graph_traits<Graph>::edge_iterator e,e_end;
        for (tie(e,e_end) = edges(_g); e != e_end; ++e)
        {
            typename graph_traits<Graph>::vertex_descriptor s, t;

            s = source(*e,g);
            t = target(*e,g);
            if (s != t)
            {
                size_t k1 = out_degree_no_loops(s, g);
                size_t k2 = out_degree_no_loops(t, g);
                _Pkk[k1][k2]++;
                _Pkk[k2][k1]++;
                E++;
            }
        }

        for (typeof(_Pkk.begin()) iter1 = _Pkk.begin(); iter1 != _Pkk.end();
             ++iter1)
        {
            double sum = 0;
            for (typeof(iter1->second.begin()) iter2 = iter1->second.begin();
                 iter2 != iter1->second.end(); ++iter2)
                sum += iter2->second;
            for (typeof(iter1->second.begin()) iter2 = iter1->second.begin();
                 iter2 != iter1->second.end(); ++iter2)
                iter2->second /= sum;
        }

        for (typeof(_Nk.begin()) k_iter = _Nk.begin(); k_iter != _Nk.end();
             ++k_iter)
        {
            size_t k1 = k_iter->first;
            _degs.push_back(k1);
            for (typeof(spins.begin()) s_iter = spins.begin();
                 s_iter != spins.end(); ++s_iter)
                for (typeof(_Nk.begin()) k_iter2 = _Nk.begin();
                     k_iter2 != _Nk.end(); ++k_iter2)
                {
                    size_t k2 = k_iter2->first;
                    if (_Nks[k2].find(*s_iter) != _Nks[k2].end())
                        _NNks[k1][*s_iter] +=
                            k1*_Pkk[k1][k2] * _Nks[k2][*s_iter]/double(_Nk[k2]);
                }
        }
    }

    void Update(size_t k, size_t old_s, size_t s)
    {
        int i, NK = _degs.size();
        #pragma omp parallel for default(shared) private(i) schedule(dynamic)
        for (i = 0; i < NK; ++i)
        {
            size_t k1 = _degs[i], k2 = k;
            if (_Pkk.find(k1) == _Pkk.end())
                continue;
            if (_Pkk.find(k1)->second.find(k2) == _Pkk.find(k1)->second.end())
                continue;
            unordered_map<size_t,double>& NNks_k1 = _NNks[k1];
            double Pk1k2 = _Pkk[k1][k2];
            unordered_map<size_t,size_t>& Nksk2 = _Nks[k2];
            double Nk2 = _Nk[k2];
            NNks_k1[old_s] -=  k1*Pk1k2 * Nksk2[old_s]/Nk2;
            if (NNks_k1[old_s] == 0.0)
                NNks_k1.erase(old_s);
            if (Nksk2.find(s) != Nksk2.end())
                NNks_k1[s] -=  k1*Pk1k2 * Nksk2[s]/Nk2;
            if (NNks_k1[s] == 0.0)
                NNks_k1.erase(s);
        }

        _Nks[k][old_s]--;
        if (_Nks[k][old_s] == 0)
            _Nks[k].erase(old_s);
        _Nks[k][s]++;

        #pragma omp parallel for default(shared) private(i) schedule(dynamic)
        for (i = 0; i < NK; ++i)
        {
            size_t k1 = _degs[i], k2 = k;
            if (_Pkk.find(k1) == _Pkk.end())
                continue;
            if (_Pkk.find(k1)->second.find(k2) == _Pkk.find(k1)->second.end())
                continue;
            unordered_map<size_t,double>& NNks_k1 = _NNks[k1];
            double Pk1k2 = _Pkk[k1][k2];
            unordered_map<size_t,size_t>& Nksk2 = _Nks[k2];
            double Nk2 = _Nk[k2];
            NNks_k1[old_s] +=  k1*Pk1k2 * Nksk2[old_s]/Nk2;
            if (NNks_k1[old_s] == 0.0)
                NNks_k1.erase(old_s);
            NNks_k1[s] +=  k1*Pk1k2 * Nksk2[s]/Nk2;
        }

    }

    double operator()(size_t k, size_t s) const
    {
        const typeof(_NNks[k])& nnks = _NNks.find(k)->second;
        const typeof(nnks.begin()) iter = nnks.find(s);
        if (iter != nnks.end())
            return iter->second;
        return 0.0;
    }

private:
    const Graph& _g;
    vector<size_t> _degs;
    unordered_map<size_t,size_t> _Nk;
    unordered_map<size_t,unordered_map<size_t,double> > _Pkk;
    unordered_map<size_t,unordered_map<size_t,size_t> > _Nks;
    unordered_map<size_t,unordered_map<size_t,double> > _NNks;
};

457 458 459 460 461 462
enum comm_corr_t
{
    ERDOS_REYNI,
    UNCORRELATED,
    CORRELATED
};
463 464 465

struct get_communities_selector
{
466 467 468
    get_communities_selector(comm_corr_t corr,
                             GraphInterface::vertex_index_map_t index)
        : _corr(corr), _index(index) {}
469
    comm_corr_t _corr;
470
    GraphInterface::vertex_index_map_t _index;
471 472

    template <class Graph, class WeightMap, class CommunityMap>
473 474 475 476
    void operator()(const Graph& g, WeightMap weights, CommunityMap s,
                    double gamma, size_t n_iter, pair<double, double> Tinterval,
                    size_t Nspins, size_t seed, pair<bool, string> verbose)
        const
477 478 479
    {
        switch (_corr)
        {
480
        case ERDOS_REYNI:
481 482 483
            get_communities<NNKSErdosReyni>()(g, _index, weights, s, gamma,
                                              n_iter, Tinterval, Nspins, seed,
                                              verbose);
484
            break;
485
        case UNCORRELATED:
486
            get_communities<NNKSUncorr>()(g, _index, weights, s, gamma, n_iter,
487 488
                                          Tinterval, Nspins, seed, verbose);
            break;
489
        case CORRELATED:
490
            get_communities<NNKSCorr>()(g, _index, weights, s, gamma, n_iter,
491 492 493 494 495 496 497 498 499 500
                                        Tinterval, Nspins, seed, verbose);
            break;
        }
    }
};

// get Newman's modularity of a given community partition
struct get_modularity
{
    template <class Graph, class WeightMap, class CommunityMap>
501
    void operator()(const Graph& g, WeightMap weights, CommunityMap s,
502 503 504
                    double& modularity) const
    {
        typedef typename property_traits<WeightMap>::key_type weight_key_t;
505
        typedef typename property_traits<WeightMap>::value_type weight_val_t;
506
        typedef typename property_traits<CommunityMap>::value_type s_val_t;
507

508
        modularity = 0.0;
509 510 511 512 513 514 515 516
        double W = 0;

        typename graph_traits<Graph>::edge_iterator e, e_end;
        for (tie(e,e_end) = edges(g); e != e_end; ++e)
            if (target(*e,g) != source(*e,g))
            {
                W += get(weights, *e);
                if (get(s, target(*e,g)) == get(s, source(*e,g)))
517
                    modularity += 2 * get(weights, weight_key_t(*e));
518 519
            }

520
        unordered_map<s_val_t, weight_val_t> Ks;
521 522 523

        typename graph_traits<Graph>::vertex_iterator v, v_end;
        for (tie(v,v_end) = vertices(g); v != v_end; ++v)
524
            Ks[get(s, *v)] += out_degree_no_loops_weighted(*v, weights, g);
525

526
        for (typeof(Ks.begin()) iter = Ks.begin(); iter != Ks.end(); ++iter)
527
            modularity -= (iter->second*iter->second)/double(2*W);
528 529 530 531 532 533 534 535

        modularity /= 2*W;
    }
};

} // graph_tool namespace

#endif //GRAPH_COMMUNITY_HH