__init__.py 17.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#! /usr/bin/env python
# graph_tool.py -- a general graph manipulation python module
#
# Copyright (C) 2007 Tiago de Paula Peixoto <tiago@forked.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

19
20
21
"""
``graph_tool.correlations`` - Correlations
------------------------------------------
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

Summary
+++++++

.. autosummary::
   :nosignatures:

   assortativity
   scalar_assortativity
   corr_hist
   combined_corr_hist
   avg_neighbour_corr
   avg_combined_corr

Contents
++++++++
38
39
40
"""


Tiago Peixoto's avatar
Tiago Peixoto committed
41
42
from .. dl_import import dl_import
dl_import("import libgraph_tool_correlations")
43

44
from .. core import _degree, _prop
45
46
from numpy import *

47
48
__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
           "combined_corr_hist", "avg_neighbour_corr", "avg_combined_corr"]
49
50

def assortativity(g, deg):
51
52
53
54
55
    r"""
    Obtain the assortativity coefficient for the given graph.

    Parameters
    ----------
56
    g : :class:`~graph_tool.Graph`
57
        Graph to be used.
58
    deg : string or :class:`~graph_tool.PropertyMap`
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
        degree type ("in", "out" or "total") or vertex property map, which
        specifies the vertex types.

    Returns
    -------
    assortativity coefficient : tuple of two floats
        The assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
78
    The assortativity coefficient [newman-mixing-2003]_ tells in a concise
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    fashion how vertices of different types are preferentially connected amongst
    themselves, and is defined by

    .. math::
        r = \frac{\sum_i e_{ii} - \sum_i a_i b_i}{1-\sum_i a_i b_i}

    where :math:`a_i=\sum_je_{ij}` and :math:`b_j=\sum_ie_{ij}`, and
    :math:`e_{ij}` is the fraction of edges from a vertex of type
    i to a vertex of type j.


    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(1000, lambda: sample_k(40),
    ...                     lambda i,k: 1.0/(1+abs(i-k)), directed=False)
    >>> gt.assortativity(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
108
    (0.15379786566227244, 0.0052484342042414195)
109
110
111

    References
    ----------
112
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
113
114
115
116
        Phys. Rev. E 67, 026126 (2003)
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife

    """
117
    return libgraph_tool_correlations.\
118
           assortativity_coefficient(g._Graph__graph, _degree(g, deg))
119
120

def scalar_assortativity(g, deg):
121
122
123
124
125
    r"""
    Obtain the scalar assortativity coefficient for the given graph.

    Parameters
    ----------
126
    g : :class:`~graph_tool.Graph`
127
        Graph to be used.
128
    deg : string or :class:`~graph_tool.PropertyMap`
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
        degree type ("in", "out" or "total") or vertex property map, which
        specifies the vertex types.

    Returns
    -------
    scalar assortativity coefficient : tuple of two floats
        The scalar assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
148
    The scalar assortativity coefficient [newman-mixing-2003]_ tells in a
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    concise fashion how vertices of different types are preferentially connected
    amongst themselves, and is defined by

    .. math::
        r = \frac{\sum_{xy} xy(e_{xy} - a_x b_y)}{\sigma_a\sigma_b}

    where :math:`a_x=\sum_ye_{xy}` and :math:`b_y=\sum_xe_{xy}`, and
    :math:`e_{xy}` is the fraction of edges from a vertex of type
    x to a vertex of type y.

    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(1000, lambda: sample_k(40), lambda i,k: abs(i-k),
    ...                     directed=False)
    >>> gt.scalar_assortativity(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
177
    (-0.46612545377150078, 0.010365307846181516)
178
179
    >>> g = gt.random_graph(1000, lambda: sample_k(40),
    ...                     lambda i,k: 1.0/(1+abs(i-k)),
180
    ...                     directed=False)
181
    >>> gt.scalar_assortativity(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
182
    (0.63254355342678503, 0.011015440807502176)
183
184
185

    References
    ----------
186
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
187
188
189
        Phys. Rev. E 67, 026126 (2003)
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife
    """
190
    return libgraph_tool_correlations.\
191
           scalar_assortativity_coefficient(g._Graph__graph,
192
                                            _degree(g, deg))
193

194
195
196
197
198
199
200
def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None,
              float_count=True):
    r"""
    Obtain the correlation histogram for the given graph.

    Parameters
    ----------
201
    g : :class:`~graph_tool.Graph`
202
        Graph to be used.
203
    deg_source : string or :class:`~graph_tool.PropertyMap`
204
205
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
206
    deg_target : string or :class:`~graph_tool.PropertyMap`
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
    bins : list of lists (optional, default: [[1], [1]])
        A list of bins to be used for the source and target degrees. If any list
        has size 1, it is used as the constant width of an automatically
        generated bin range.
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
222
    bin_counts : :class:`~numpy.ndarray`
223
        Two-dimensional array with the bin counts.
224
    source_bins : :class:`~numpy.ndarray`
225
        Source degree bins
226
    target_bins : :class:`~numpy.ndarray`
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
        Target degree bins


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The correlation histogram counts, for every vertex with degree (or scalar
    property) 'source_deg', the number of out-neighbours with degree (or scalar
    property) 'target_deg'.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40),
    ...                     lambda i,j: (sin(i/pi)*sin(j/pi)+1)/2,
    ...                     directed=False)
    >>> h = gt.corr_hist(g, "out", "out")
    >>> clf()
264
    >>> xlabel("source out-degree")
265
    <...>
266
    >>> ylabel("target out-degree")
267
268
269
    <...>
    >>> imshow(h[0], interpolation="nearest")
    <...>
270
271
    >>> colorbar()
    <...>
272
273
    >>> savefig("corr.png")

274
275
276
277
    .. figure:: corr.png
        :align: center

        Out/out-degree correlation histogram.
278
279
    """

280
    ret = libgraph_tool_correlations.\
281
282
283
          vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source),
                                       _degree(g, deg_target),
                                       _prop("e", g, weight), bins[0], bins[1])
284
285
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
286

287
def combined_corr_hist(g, deg1, deg2, bins=[[1],[1]], float_count=True):
288
289
290
291
292
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
293
    g : :class:`~graph_tool.Graph`
294
        Graph to be used.
295
    deg1 : string or :class:`~graph_tool.PropertyMap`
296
        first degree type ("in", "out" or "total") or vertex property map.
297
    deg2 : string or :class:`~graph_tool.PropertyMap`
298
299
300
301
302
303
304
305
306
307
308
309
        second degree type ("in", "out" or "total") or vertex property map.
    bins : list of lists (optional, default: [[1], [1]])
        A list of bins to be used for the first and second degrees. If any list
        has size 1, it is used as the constant width of an automatically
        generated bin range.
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
310
    bin_counts : :class:`~numpy.ndarray`
311
        Two-dimensional array with the bin counts.
312
    first_bins : :class:`~numpy.ndarray`
313
        First degree bins
314
    second_bins : :class:`~numpy.ndarray`
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
        Second degree bins

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         i = randint(1,max+1)
    ...         j = randint(1,max+1)
    ...         accept = random() < (sin(i/pi)*sin(j/pi)+1)/2
    ...     return i,j
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40))
    >>> h = gt.combined_corr_hist(g, "in", "out")
    >>> clf()
346
    >>> xlabel("in-degree")
347
    <...>
348
    >>> ylabel("out-degree")
349
350
351
    <...>
    >>> imshow(h[0], interpolation="nearest")
    <...>
352
353
    >>> colorbar()
    <...>
354
355
    >>> savefig("combined_corr.png")

356
357
358
359
360
    .. figure:: combined_corr.png
        :align: center

        Combined in/out-degree correlation histogram.

361
    """
362
    ret = libgraph_tool_correlations.\
363
          vertex_combined_correlation_histogram(g._Graph__graph,
364
365
                                                _degree(g, deg1),
                                                _degree(g, deg2),
366
                                                bins[0], bins[1])
367
368
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
369

370
371
372
373
374
375
def avg_neighbour_corr(g, deg_source, deg_target, bins=[1], weight=None):
    r"""
    Obtain the average neighbour-neighbour correlation for the given graph.

    Parameters
    ----------
376
    g : :class:`~graph_tool.Graph`
377
        Graph to be used.
378
    deg_source : string or :class:`~graph_tool.PropertyMap`
379
380
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
381
    deg_target : string or :class:`~graph_tool.PropertyMap`
382
383
384
385
386
387
388
389
390
391
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
    bins : list (optional, default: [1])
        Bins to be used for the source degrees. If the list has size 1, it is
        used as the constant width of an automatically generated bin range.
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.

    Returns
    -------
392
    bin_avg : :class:`~numpy.ndarray`
393
        Array with the deg_target average for the get_source bins.
394
    bin_dev : :class:`~numpy.ndarray`
395
396
        Array with the standard deviation of the deg_target average for the
        get_source bins.
397
398
    bins : :class:`~numpy.ndarray`
        Source degree bins,
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The average correlation is the average, for every vertex with degree (or
    scalar property) 'source_deg', the of the 'target_deg' degree (or
    scalar property) of its neighbours.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40),
    ...                     lambda i,j: (sin(i/pi)*sin(j/pi)+1)/2,
    ...                     directed=False)
    >>> h = gt.avg_neighbour_corr(g, "out", "out")
    >>> clf()
435
    >>> xlabel("source out-degree")
436
    <...>
437
    >>> ylabel("target out-degree")
438
    <...>
439
    >>> errorbar(h[2], h[0], yerr=h[1], fmt="o")
440
441
442
    (...)
    >>> savefig("avg_corr.png")

443
444
445
446
    .. figure:: avg_corr.png
        :align: center

        Average out/out degree correlation.
447
448
    """

449
    ret = libgraph_tool_correlations.\
450
          vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source),
451
                                 _degree(g, deg_target), _prop("e", g, weight),
452
453
454
455
                                 bins)
    return [ret[0], ret[1], ret[2][0]]

def avg_combined_corr(g, deg1, deg2, bins=[1]):
456
457
458
459
460
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
461
    g : :class:`~graph_tool.Graph`
462
        Graph to be used.
463
    deg1 : string or :class:`~graph_tool.PropertyMap`
464
        first degree type ("in", "out" or "total") or vertex property map.
465
    deg2 : string or :class:`~graph_tool.PropertyMap`
466
467
468
469
470
471
472
473
        second degree type ("in", "out" or "total") or vertex property map.
    bins : list (optional, default: [1])
        A list of bins to be used for the first degrees. If the list
        has size 1, it is used as the constant width of an automatically
        generated bin range.

    Returns
    -------
474
    bin_avg : :class:`~numpy.ndarray`
475
        Array with the deg2 average for the deg1 bins.
476
    bin_dev : :class:`~numpy.ndarray`
477
        Array with the standard deviation of the deg2 average for the deg1 bins.
478
    bins : :class:`~numpy.ndarray`
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
        The deg1 bins.

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         i = randint(1,max+1)
    ...         j = randint(1,max+1)
    ...         accept = random() < (sin(i/pi)*sin(j/pi)+1)/2
    ...     return i,j
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40))
    >>> h = gt.avg_combined_corr(g, "in", "out")
    >>> clf()
511
    >>> xlabel("in-degree")
512
    <...>
513
    >>> ylabel("out-degree")
514
    <...>
515
    >>> errorbar(h[2], h[0], yerr=h[1], fmt="o")
516
517
518
    (...)
    >>> savefig("combined_avg_corr.png")

519
520
521
522
    .. figure:: combined_avg_corr.png
        :align: center

        Average combined in/out-degree correlation.
523
524
    """

525
526
527
528
    ret = libgraph_tool_correlations.\
          vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
                                          _degree(g, deg2), bins)
    return [ret[0], ret[1], ret[2][0]]