graph_clustering.hh 5.06 KB
Newer Older
1
2
// graph-tool -- a general graph modification and manipulation thingy
//
Tiago Peixoto's avatar
Tiago Peixoto committed
3
// Copyright (C) 2006-2013 Tiago de Paula Peixoto <tiago@skewed.de>
4
5
6
7
8
9
10
11
12
13
14
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
15
// you should have received a copy of the GNU General Public License
16
17
18
19
20
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#ifndef GRAPH_CLUSTERING_HH
#define GRAPH_CLUSTERING_HH

21
22
#include "config.h"

23
24
#include "tr1_include.hh"
#include TR1_HEADER(unordered_set)
25
26
#include <boost/mpl/if.hpp>

27
28
29
30
#ifdef HAVE_SPARSEHASH
#include <dense_hash_set>
#endif

31
32
33
#include <ext/numeric>
using __gnu_cxx::power;

34
35
36
37
namespace graph_tool
{
using namespace boost;

38
39
40
41
#ifdef HAVE_SPARSEHASH
using google::dense_hash_set;
#endif

42
43
// calculates the number of triangles to which v belongs
template <class Graph>
44
pair<int,int>
45
46
get_triangles(typename graph_traits<Graph>::vertex_descriptor v, const Graph &g)
{
47
48
49
50
51
52
53
54
55
56
57
58
59
60
    typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;

#ifdef HAVE_SPARSEHASH
    typedef dense_hash_set<vertex_t> set_t;
#else
    typedef unordered_set<vertex_t> set_t;
#endif

    set_t neighbour_set;

#ifdef HAVE_SPARSEHASH
     neighbour_set.set_empty_key(numeric_limits<vertex_t>::max());
     neighbour_set.resize(out_degree(v, g));
#endif
61

62
    size_t triangles = 0;
63

64
65
    typename graph_traits<Graph>::adjacency_iterator n, n_end;
    for (tie(n, n_end) = adjacent_vertices(v, g); n != n_end; ++n)
66
    {
67
        if (*n == v) // no self-loops
68
            continue;
69
70
        neighbour_set.insert(*n);
    }
71

72
73
74
75
    for (tie(n, n_end) = adjacent_vertices(v, g); n != n_end; ++n)
    {
        typename graph_traits<Graph>::adjacency_iterator n2, n2_end;
        for (tie(n2, n2_end) = adjacent_vertices(*n, g); n2 != n2_end; ++n2)
76
        {
77
            if (*n2 == *n) // no self-loops
78
                continue;
79
80
            if (neighbour_set.find(*n2) != neighbour_set.end())
                ++triangles;
81
82
        }
    }
83
84

    size_t k = out_degree(v, g);
85
86
87
88
89
90
91
92
    return make_pair(triangles/2,(k*(k-1))/2);
}


// retrieves the global clustering coefficient
struct get_global_clustering
{
    template <class Graph>
93
    void operator()(const Graph& g, double& c, double& c_err) const
94
95
96
97
98
99
100
    {
        size_t triangles = 0, n = 0;
        pair<size_t, size_t> temp;

        int i, N = num_vertices(g);

        #pragma omp parallel for default(shared) private(i,temp) \
101
            schedule(static) if (N > 100) reduction(+:triangles, n)
102
103
104
105
106
107
108
        for (i = 0; i < N; ++i)
        {
            typename graph_traits<Graph>::vertex_descriptor v = vertex(i, g);
            if (v == graph_traits<Graph>::null_vertex())
                continue;

            temp = get_triangles(v, g);
109
            triangles += temp.first;
110
111
            n += temp.second;
        }
112
        c = double(triangles) / n;
113
114
115

        // "jackknife" variance
        c_err = 0.0;
116
        double cerr = 0.0;
117

118
        #pragma omp parallel for default(shared) private(i,temp) \
119
            schedule(static) if (N > 100) reduction(+:cerr)
120
121
122
123
124
125
126
        for (i = 0; i < N; ++i)
        {
            typename graph_traits<Graph>::vertex_descriptor v = vertex(i, g);
            if (v == graph_traits<Graph>::null_vertex())
                continue;

            temp = get_triangles(v, g);
127
            double cl = double(triangles - temp.first) / (n - temp.second);
128

129
            cerr += power(c - cl, 2);
130
        }
131
        c_err = sqrt(cerr);
132
133
134
135
136
137
138
    }
};

// sets the local clustering coefficient to a property
struct set_clustering_to_property
{
    template <class Graph, class ClustMap>
139
    void operator()(const Graph& g, ClustMap clust_map) const
140
    {
141
        typedef typename property_traits<ClustMap>::value_type c_type;
142
143
        typename get_undirected_graph<Graph>::type ug(g);
        int i, N = num_vertices(g);
144

145
        #pragma omp parallel for default(shared) private(i) schedule(static) if (N > 100)
146
147
148
149
150
        for (i = 0; i < N; ++i)
        {
            typename graph_traits<Graph>::vertex_descriptor v = vertex(i, g);
            if (v == graph_traits<Graph>::null_vertex())
                continue;
151

152
153
154
155
            pair<size_t,size_t> triangles = get_triangles(v,ug); // get from ug
            double clustering = (triangles.second > 0) ?
                double(triangles.first)/triangles.second :
                0.0;
156

157
            clust_map[v] = c_type(clustering);
158
159
        }
    }
160

161
162
163
164
165
166
167
168
169
170
171
    template <class Graph>
    struct get_undirected_graph
    {
        typedef typename mpl::if_
            < is_convertible<typename graph_traits<Graph>::directed_category,
                             directed_tag>,
              const UndirectedAdaptor<Graph>,
              const Graph& >::type type;
    };
};

172
} //graph-tool namespace
173
174

#endif // GRAPH_CLUSTERING_HH