__init__.py 18.4 KB
Newer Older
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4
5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2007-2011 Tiago de Paula Peixoto <tiago@skewed.de>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
22
23
"""
``graph_tool.correlations`` - Correlations
------------------------------------------
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

Summary
+++++++

.. autosummary::
   :nosignatures:

   assortativity
   scalar_assortativity
   corr_hist
   combined_corr_hist
   avg_neighbour_corr
   avg_combined_corr

Contents
++++++++
40
41
42
"""


Tiago Peixoto's avatar
Tiago Peixoto committed
43
44
from .. dl_import import dl_import
dl_import("import libgraph_tool_correlations")
45

46
from .. import _degree, _prop
47
48
from numpy import *

49
50
__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
           "combined_corr_hist", "avg_neighbour_corr", "avg_combined_corr"]
51

52

53
def assortativity(g, deg):
54
55
56
57
58
    r"""
    Obtain the assortativity coefficient for the given graph.

    Parameters
    ----------
59
    g : :class:`~graph_tool.Graph`
60
        Graph to be used.
61
    deg : string or :class:`~graph_tool.PropertyMap`
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
        degree type ("in", "out" or "total") or vertex property map, which
        specifies the vertex types.

    Returns
    -------
    assortativity coefficient : tuple of two floats
        The assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
81
    The assortativity coefficient [newman-mixing-2003]_ tells in a concise
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    fashion how vertices of different types are preferentially connected amongst
    themselves, and is defined by

    .. math::
        r = \frac{\sum_i e_{ii} - \sum_i a_i b_i}{1-\sum_i a_i b_i}

    where :math:`a_i=\sum_je_{ij}` and :math:`b_j=\sum_ie_{ij}`, and
    :math:`e_{ij}` is the fraction of edges from a vertex of type
    i to a vertex of type j.


    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(1000, lambda: sample_k(40),
109
110
    ...                     lambda i,k: 1.0 / (1 + abs(i - k)), directed=False,
    ...                     mix_time=100)
111
    >>> gt.assortativity(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
112
    (0.14145218664992676, 0.005077209994557802)
113
114
115

    References
    ----------
116
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Tiago Peixoto's avatar
Tiago Peixoto committed
117
        Phys. Rev. E 67, 026126 (2003), :doi:`10.1103/PhysRevE.67.026126`
118
119
120
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife

    """
121
    return libgraph_tool_correlations.\
122
           assortativity_coefficient(g._Graph__graph, _degree(g, deg))
123

124

125
def scalar_assortativity(g, deg):
126
127
128
129
130
    r"""
    Obtain the scalar assortativity coefficient for the given graph.

    Parameters
    ----------
131
    g : :class:`~graph_tool.Graph`
132
        Graph to be used.
133
    deg : string or :class:`~graph_tool.PropertyMap`
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
        degree type ("in", "out" or "total") or vertex property map, which
        specifies the vertex types.

    Returns
    -------
    scalar assortativity coefficient : tuple of two floats
        The scalar assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
153
    The scalar assortativity coefficient [newman-mixing-2003]_ tells in a
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    concise fashion how vertices of different types are preferentially connected
    amongst themselves, and is defined by

    .. math::
        r = \frac{\sum_{xy} xy(e_{xy} - a_x b_y)}{\sigma_a\sigma_b}

    where :math:`a_x=\sum_ye_{xy}` and :math:`b_y=\sum_xe_{xy}`, and
    :math:`e_{xy}` is the fraction of edges from a vertex of type
    x to a vertex of type y.

    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(1000, lambda: sample_k(40), lambda i,k: abs(i-k),
180
    ...                     directed=False, mix_time=100)
181
    >>> gt.scalar_assortativity(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
182
    (-0.46972665544654923, 0.010035656615797507)
183

184
    >>> g = gt.random_graph(1000, lambda: sample_k(40),
185
186
    ...                     lambda i, k: 1.0 / (1 + abs(i - k)),
    ...                     directed=False, mix_time=100)
187
    >>> gt.scalar_assortativity(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
188
    (0.6120658464996896, 0.011388445161055338)
189
190
191

    References
    ----------
192
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Tiago Peixoto's avatar
Tiago Peixoto committed
193
        Phys. Rev. E 67, 026126 (2003), :doi:`10.1103/PhysRevE.67.026126`
194
195
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife
    """
196
    return libgraph_tool_correlations.\
197
           scalar_assortativity_coefficient(g._Graph__graph,
198
                                            _degree(g, deg))
199

200
201

def corr_hist(g, deg_source, deg_target, bins=[[0, 1], [0, 1]], weight=None,
202
203
204
205
206
207
              float_count=True):
    r"""
    Obtain the correlation histogram for the given graph.

    Parameters
    ----------
208
    g : :class:`~graph_tool.Graph`
209
        Graph to be used.
210
    deg_source : string or :class:`~graph_tool.PropertyMap`
211
212
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
213
    deg_target : string or :class:`~graph_tool.PropertyMap`
214
215
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
216
    bins : list of lists (optional, default: [[0, 1], [0, 1]])
217
        A list of bin edges to be used for the source and target degrees. If any
218
219
220
        list has size 2, it is used to create an automatically generated bin
        range starting from the first value, and with constant bin width given
        by the second value.
221
222
223
224
225
226
227
228
229
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
230
    bin_counts : :class:`~numpy.ndarray`
231
        Two-dimensional array with the bin counts.
232
    source_bins : :class:`~numpy.ndarray`
233
        Source degree bins
234
    target_bins : :class:`~numpy.ndarray`
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
        Target degree bins


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The correlation histogram counts, for every vertex with degree (or scalar
    property) 'source_deg', the number of out-neighbours with degree (or scalar
    property) 'target_deg'.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
    ...         accept = random() < 1.0/k
    ...     return k
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40),
268
269
    ...                     lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
    ...                     directed=False, mix_time=100)
270
271
    >>> h = gt.corr_hist(g, "out", "out")
    >>> clf()
272
    >>> xlabel("source out-degree")
273
    <...>
274
    >>> ylabel("target out-degree")
275
276
277
    <...>
    >>> imshow(h[0], interpolation="nearest")
    <...>
278
279
    >>> colorbar()
    <...>
280
281
    >>> savefig("corr.png")

282
283
284
285
    .. figure:: corr.png
        :align: center

        Out/out-degree correlation histogram.
286
287
    """

288
    ret = libgraph_tool_correlations.\
289
290
          vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source),
                                       _degree(g, deg_target),
291
292
293
                                       _prop("e", g, weight),
                                       [float(x) for x in bins[0]],
                                       [float(x) for x in bins[1]])
294
295
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
296

Tiago Peixoto's avatar
Tiago Peixoto committed
297

298
def combined_corr_hist(g, deg1, deg2, bins=[[0, 1], [0, 1]], float_count=True):
299
300
301
302
303
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
304
    g : :class:`~graph_tool.Graph`
305
        Graph to be used.
306
    deg1 : string or :class:`~graph_tool.PropertyMap`
307
        first degree type ("in", "out" or "total") or vertex property map.
308
    deg2 : string or :class:`~graph_tool.PropertyMap`
309
        second degree type ("in", "out" or "total") or vertex property map.
310
311
    bins : list of lists (optional, default: [[0, 1], [0, 1]])
        A list of bin edges to be used for the first and second degrees. If any
312
313
314
        list has size 2, it is used to create an automatically generated bin
        range starting from the first value, and with constant bin width given
        by the second value.
315
316
317
318
319
320
321
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
322
    bin_counts : :class:`~numpy.ndarray`
323
        Two-dimensional array with the bin counts.
324
    first_bins : :class:`~numpy.ndarray`
325
        First degree bins
326
    second_bins : :class:`~numpy.ndarray`
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
        Second degree bins

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
350
351
352
    ...         i = randint(1, max + 1)
    ...         j = randint(1, max + 1)
    ...         accept = random() < (sin(i / pi) * sin(j / pi) + 1) / 2
353
354
355
356
357
    ...     return i,j
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40))
    >>> h = gt.combined_corr_hist(g, "in", "out")
    >>> clf()
358
    >>> xlabel("in-degree")
359
    <...>
360
    >>> ylabel("out-degree")
361
362
363
    <...>
    >>> imshow(h[0], interpolation="nearest")
    <...>
364
365
    >>> colorbar()
    <...>
366
367
    >>> savefig("combined_corr.png")

368
369
370
371
372
    .. figure:: combined_corr.png
        :align: center

        Combined in/out-degree correlation histogram.

373
    """
374
    ret = libgraph_tool_correlations.\
375
          vertex_combined_correlation_histogram(g._Graph__graph,
376
377
                                                _degree(g, deg1),
                                                _degree(g, deg2),
378
379
                                                [float(x) for x in bins[0]],
                                                [float(x) for x in bins[1]])
380
381
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
382

Tiago Peixoto's avatar
Tiago Peixoto committed
383

384
def avg_neighbour_corr(g, deg_source, deg_target, bins=[0, 1], weight=None):
385
386
387
388
389
    r"""
    Obtain the average neighbour-neighbour correlation for the given graph.

    Parameters
    ----------
390
    g : :class:`~graph_tool.Graph`
391
        Graph to be used.
392
    deg_source : string or :class:`~graph_tool.PropertyMap`
393
394
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
395
    deg_target : string or :class:`~graph_tool.PropertyMap`
396
397
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
398
399
400
401
    bins : list (optional, default: [0, 1])
        Bins to be used for the source degrees. If the list has size 2, it is
        used as the constant width of an automatically generated bin range,
        starting from the first value.
402
403
404
405
406
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.

    Returns
    -------
407
    bin_avg : :class:`~numpy.ndarray`
408
        Array with the deg_target average for the get_source bins.
409
    bin_dev : :class:`~numpy.ndarray`
410
411
        Array with the standard deviation of the deg_target average for the
        get_source bins.
412
413
    bins : :class:`~numpy.ndarray`
        Source degree bins,
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The average correlation is the average, for every vertex with degree (or
    scalar property) 'source_deg', the of the 'target_deg' degree (or
    scalar property) of its neighbours.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         k = randint(1,max+1)
442
    ...         accept = random() < 1.0 / k
443
444
445
    ...     return k
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40),
446
447
    ...                     lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
    ...                     directed=False, mix_time=100)
448
449
    >>> h = gt.avg_neighbour_corr(g, "out", "out")
    >>> clf()
450
    >>> xlabel("source out-degree")
451
    <...>
452
    >>> ylabel("target out-degree")
453
    <...>
454
    >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
455
456
457
    (...)
    >>> savefig("avg_corr.png")

458
459
460
461
    .. figure:: avg_corr.png
        :align: center

        Average out/out degree correlation.
462
463
    """

464
    ret = libgraph_tool_correlations.\
465
          vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source),
466
                                 _degree(g, deg_target), _prop("e", g, weight),
467
                                 [float(x) for x in bins])
468
469
    return [ret[0], ret[1], ret[2][0]]

470
471

def avg_combined_corr(g, deg1, deg2, bins=[0, 1]):
472
473
474
475
476
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
477
    g : :class:`~graph_tool.Graph`
478
        Graph to be used.
479
    deg1 : string or :class:`~graph_tool.PropertyMap`
480
        first degree type ("in", "out" or "total") or vertex property map.
481
    deg2 : string or :class:`~graph_tool.PropertyMap`
482
        second degree type ("in", "out" or "total") or vertex property map.
483
484
485
486
    bins : list (optional, default: [0, 1])
        Bins to be used for the first degrees. If the list has size 2, it is
        used as the constant width of an automatically generated bin range,
        starting from the first value.
487
488
489

    Returns
    -------
490
    bin_avg : :class:`~numpy.ndarray`
491
        Array with the deg2 average for the deg1 bins.
492
    bin_dev : :class:`~numpy.ndarray`
493
        Array with the standard deviation of the deg2 average for the deg1 bins.
494
    bins : :class:`~numpy.ndarray`
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
        The deg1 bins.

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
    avg_neighbour_corr: average nearest-neighbour correlation
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
    >>> from numpy.random import randint, random, seed
    >>> from pylab import *
    >>> seed(42)
    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
    ...         i = randint(1,max+1)
    ...         j = randint(1,max+1)
    ...         accept = random() < (sin(i/pi)*sin(j/pi)+1)/2
    ...     return i,j
    ...
    >>> g = gt.random_graph(10000, lambda: sample_k(40))
    >>> h = gt.avg_combined_corr(g, "in", "out")
    >>> clf()
527
    >>> xlabel("in-degree")
528
    <...>
529
    >>> ylabel("out-degree")
530
    <...>
531
    >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
532
533
534
    (...)
    >>> savefig("combined_avg_corr.png")

535
536
537
538
    .. figure:: combined_avg_corr.png
        :align: center

        Average combined in/out-degree correlation.
539
540
    """

541
542
    ret = libgraph_tool_correlations.\
          vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
543
544
                                          _degree(g, deg2),
                                          [float(x) for x in bins])
545
    return [ret[0], ret[1], ret[2][0]]