__init__.py 19.3 KB
Newer Older
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4 5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2018 Tiago de Paula Peixoto <tiago@skewed.de>
7 8 9 10 11 12 13 14 15 16 17 18 19 20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21 22 23
"""
``graph_tool.correlations`` - Correlations
------------------------------------------
24 25 26 27 28 29 30 31 32 33 34

Summary
+++++++

.. autosummary::
   :nosignatures:

   assortativity
   scalar_assortativity
   corr_hist
   combined_corr_hist
35
   avg_neighbor_corr
36 37 38 39
   avg_combined_corr

Contents
++++++++
40 41
"""

42
from __future__ import division, absolute_import, print_function
43

Tiago Peixoto's avatar
Tiago Peixoto committed
44
from .. dl_import import dl_import
45
dl_import("from . import libgraph_tool_correlations")
46

47
from .. import _degree, _prop
48 49
from numpy import *

50
__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
51 52
           "combined_corr_hist", "avg_neighbor_corr", "avg_neighbour_corr",
           "avg_combined_corr"]
53

54

55 56
def assortativity(g, deg, eweight=None):
    r"""Obtain the assortativity coefficient for the given graph.
57 58 59

    Parameters
    ----------
60
    g : :class:`~graph_tool.Graph`
61
        Graph to be used.
62
    deg : string or :class:`~graph_tool.VertexPropertyMap`
63
        Degree type ("in", "out" or "total") or vertex property map, which
64
        specifies the vertex types.
65
    eweight : :class:`~graph_tool.EdgePropertyMap` (optional, default: `None`)
66 67
        If given, this will specify the edge weights, otherwise a constant value
        of one will be used.
68 69 70 71 72 73 74 75 76 77 78 79

    Returns
    -------
    assortativity coefficient : tuple of two floats
        The assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
80
    avg_neighbor_corr: average nearest-neighbor correlation
81 82 83 84
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
85
    The assortativity coefficient [newman-mixing-2003]_ tells in a concise
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    fashion how vertices of different types are preferentially connected amongst
    themselves, and is defined by

    .. math::
        r = \frac{\sum_i e_{ii} - \sum_i a_i b_i}{1-\sum_i a_i b_i}

    where :math:`a_i=\sum_je_{ij}` and :math:`b_j=\sum_ie_{ij}`, and
    :math:`e_{ij}` is the fraction of edges from a vertex of type
    i to a vertex of type j.


    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
103

104
    .. doctest:: assortativity
105

106 107 108
       >>> g = gt.collection.data["pgp-strong-2009"]
       >>> g = gt.GraphView(g, directed=False)
       >>> gt.assortativity(g, "out")
109
       (0.0240578552..., 0.0003272847...)
110 111 112

    References
    ----------
113
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Tiago Peixoto's avatar
Tiago Peixoto committed
114
        Phys. Rev. E 67, 026126 (2003), :doi:`10.1103/PhysRevE.67.026126`
115 116 117
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife

    """
118
    return libgraph_tool_correlations.\
119 120
           assortativity_coefficient(g._Graph__graph, _degree(g, deg),
                                     _prop("e", g, eweight))
121

122

123
def scalar_assortativity(g, deg, eweight=None):
124 125 126 127 128
    r"""
    Obtain the scalar assortativity coefficient for the given graph.

    Parameters
    ----------
129
    g : :class:`~graph_tool.Graph`
130
        Graph to be used.
131
    deg : string or :class:`~graph_tool.VertexPropertyMap`
132 133
        Degree type ("in", "out" or "total") or vertex property map, which
        specifies the vertex scalar values.
134
    eweight : :class:`~graph_tool.EdgePropertyMap` (optional, default: `None`)
135 136
        If given, this will specify the edge weights, otherwise a constant value
        of one will be used.
137 138 139 140 141 142 143 144 145 146 147 148

    Returns
    -------
    scalar assortativity coefficient : tuple of two floats
        The scalar assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
149
    avg_neighbor_corr: average nearest-neighbor correlation
150 151 152 153
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
154
    The scalar assortativity coefficient [newman-mixing-2003]_ tells in a
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
    concise fashion how vertices of different types are preferentially connected
    amongst themselves, and is defined by

    .. math::
        r = \frac{\sum_{xy} xy(e_{xy} - a_x b_y)}{\sigma_a\sigma_b}

    where :math:`a_x=\sum_ye_{xy}` and :math:`b_y=\sum_xe_{xy}`, and
    :math:`e_{xy}` is the fraction of edges from a vertex of type
    x to a vertex of type y.

    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
171

172
    .. doctest:: scalar_assortativity
173

174 175 176
       >>> g = gt.collection.data["pgp-strong-2009"]
       >>> g = gt.GraphView(g, directed=False)
       >>> gt.scalar_assortativity(g, "out")
177
       (0.026384932418..., 0.001213444270...)
178 179 180

    References
    ----------
181
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Tiago Peixoto's avatar
Tiago Peixoto committed
182
        Phys. Rev. E 67, 026126 (2003), :doi:`10.1103/PhysRevE.67.026126`
183 184
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife
    """
185
    return libgraph_tool_correlations.\
186 187
           scalar_assortativity_coefficient(g._Graph__graph, _degree(g, deg),
                                            _prop("e", g, eweight))
188

189 190

def corr_hist(g, deg_source, deg_target, bins=[[0, 1], [0, 1]], weight=None,
191 192 193 194 195 196
              float_count=True):
    r"""
    Obtain the correlation histogram for the given graph.

    Parameters
    ----------
197
    g : :class:`~graph_tool.Graph`
198
        Graph to be used.
199
    deg_source : string or :class:`~graph_tool.VertexPropertyMap`
200 201
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
202
    deg_target : string or :class:`~graph_tool.VertexPropertyMap`
203 204
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
205
    bins : list of lists (optional, default: [[0, 1], [0, 1]])
206
        A list of bin edges to be used for the source and target degrees. If any
207 208 209
        list has size 2, it is used to create an automatically generated bin
        range starting from the first value, and with constant bin width given
        by the second value.
210 211 212 213 214 215 216 217 218
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
219
    bin_counts : :class:`~numpy.ndarray`
220
        Two-dimensional array with the bin counts.
221
    source_bins : :class:`~numpy.ndarray`
222
        Source degree bins
223
    target_bins : :class:`~numpy.ndarray`
224 225 226 227 228 229 230 231 232
        Target degree bins


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
233
    avg_neighbor_corr: average nearest-neighbor correlation
234 235 236 237 238
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The correlation histogram counts, for every vertex with degree (or scalar
239
    property) 'source_deg', the number of out-neighbors with degree (or scalar
240 241 242 243 244 245
    property) 'target_deg'.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
246

247 248 249
    .. testsetup:: corr_hist

       from pylab import *
250 251 252
       np.random.seed(42)
       gt.seed_rng(42)

253 254 255 256 257 258 259 260 261
    .. doctest:: corr_hist

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         k = np.random.randint(1,max+1)
       ...         accept = np.random.random() < 1.0/k
       ...     return k
       ...
262 263 264
       >>> g = gt.random_graph(10000, lambda: sample_k(40),
       ...                     model="probabilistic-configuration",
       ...                     edge_probs=lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
265 266 267 268
       ...                     directed=False, n_iter=100)
       >>> h = gt.corr_hist(g, "out", "out")
       >>> clf()
       >>> xlabel("Source out-degree")
269
       Text(...)
270
       >>> ylabel("Target out-degree")
271
       Text(...)
272 273 274 275
       >>> imshow(h[0].T, interpolation="nearest", origin="lower")
       <...>
       >>> colorbar()
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
276
       >>> savefig("corr.svg")
277 278

    .. testcode:: corr_hist
279 280
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
281
       savefig("corr.pdf")
282

283
    .. figure:: corr.*
284 285 286
        :align: center

        Out/out-degree correlation histogram.
287 288
    """

289
    ret = libgraph_tool_correlations.\
290 291
          vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source),
                                       _degree(g, deg_target),
292 293 294
                                       _prop("e", g, weight),
                                       [float(x) for x in bins[0]],
                                       [float(x) for x in bins[1]])
295 296
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
297

Tiago Peixoto's avatar
Tiago Peixoto committed
298

299
def combined_corr_hist(g, deg1, deg2, bins=[[0, 1], [0, 1]], float_count=True):
300 301 302 303 304
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
305
    g : :class:`~graph_tool.Graph`
306
        Graph to be used.
307
    deg1 : string or :class:`~graph_tool.VertexPropertyMap`
308
        first degree type ("in", "out" or "total") or vertex property map.
309
    deg2 : string or :class:`~graph_tool.VertexPropertyMap`
310
        second degree type ("in", "out" or "total") or vertex property map.
311 312
    bins : list of lists (optional, default: [[0, 1], [0, 1]])
        A list of bin edges to be used for the first and second degrees. If any
313 314 315
        list has size 2, it is used to create an automatically generated bin
        range starting from the first value, and with constant bin width given
        by the second value.
316 317 318 319 320 321 322
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
323
    bin_counts : :class:`~numpy.ndarray`
324
        Two-dimensional array with the bin counts.
325
    first_bins : :class:`~numpy.ndarray`
326
        First degree bins
327
    second_bins : :class:`~numpy.ndarray`
328 329 330 331 332 333 334 335 336 337 338 339
        Second degree bins

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
340
    avg_neighbor_corr: average nearest-neighbor correlation
341 342 343 344
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
345

346 347 348
    .. testsetup:: combined_corr_hist

       from pylab import *
349 350 351
       np.random.seed(42)
       gt.seed_rng(42)

352 353 354 355 356 357 358 359 360 361 362 363 364 365
    .. doctest:: combined_corr_hist

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         i = np.random.randint(1, max + 1)
       ...         j = np.random.randint(1, max + 1)
       ...         accept = np.random.random() < (sin(i / pi) * sin(j / pi) + 1) / 2
       ...     return i,j
       ...
       >>> g = gt.random_graph(10000, lambda: sample_k(40))
       >>> h = gt.combined_corr_hist(g, "in", "out")
       >>> clf()
       >>> xlabel("In-degree")
366
       Text(...)
367
       >>> ylabel("Out-degree")
368
       Text(...)
369 370 371 372
       >>> imshow(h[0].T, interpolation="nearest", origin="lower")
       <...>
       >>> colorbar()
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
373
       >>> savefig("combined_corr.svg")
374 375

    .. testcode:: combined_corr_hist
376 377
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
378
       savefig("combined_corr.pdf")
379

380
    .. figure:: combined_corr.*
381 382 383 384
        :align: center

        Combined in/out-degree correlation histogram.

385
    """
386
    ret = libgraph_tool_correlations.\
387
          vertex_combined_correlation_histogram(g._Graph__graph,
388 389
                                                _degree(g, deg1),
                                                _degree(g, deg2),
390 391
                                                [float(x) for x in bins[0]],
                                                [float(x) for x in bins[1]])
392 393
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
394

Tiago Peixoto's avatar
Tiago Peixoto committed
395

396
def avg_neighbor_corr(g, deg_source, deg_target, bins=[0, 1], weight=None):
397
    r"""
398
    Obtain the average neighbor-neighbor correlation for the given graph.
399 400 401

    Parameters
    ----------
402
    g : :class:`~graph_tool.Graph`
403
        Graph to be used.
404
    deg_source : string or :class:`~graph_tool.VertexPropertyMap`
405 406
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
407
    deg_target : string or :class:`~graph_tool.VertexPropertyMap`
408 409
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
410 411 412 413
    bins : list (optional, default: [0, 1])
        Bins to be used for the source degrees. If the list has size 2, it is
        used as the constant width of an automatically generated bin range,
        starting from the first value.
414 415 416 417 418
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.

    Returns
    -------
419
    bin_avg : :class:`~numpy.ndarray`
420
        Array with the deg_target average for the get_source bins.
421
    bin_dev : :class:`~numpy.ndarray`
422 423
        Array with the standard deviation of the deg_target average for the
        get_source bins.
424 425
    bins : :class:`~numpy.ndarray`
        Source degree bins,
426 427 428 429 430 431 432 433


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
434
    avg_neighbor_corr: average nearest-neighbor correlation
435 436 437 438 439 440
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The average correlation is the average, for every vertex with degree (or
    scalar property) 'source_deg', the of the 'target_deg' degree (or
441
    scalar property) of its neighbors.
442 443 444 445 446

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
447

448
    .. testsetup:: avg_neighbor_corr
449 450

       from pylab import *
451 452 453
       np.random.seed(42)
       gt.seed_rng(42)

454

455
    .. doctest:: avg_neighbor_corr
456 457 458 459 460 461 462 463

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         k = np.random.randint(1,max+1)
       ...         accept = np.random.random() < 1.0 / k
       ...     return k
       ...
464 465 466
       >>> g = gt.random_graph(10000, lambda: sample_k(40),
       ...                     model="probabilistic-configuration",
       ...                     edge_probs=lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
467
       ...                     directed=False, n_iter=100)
468
       >>> h = gt.avg_neighbor_corr(g, "out", "out")
469 470
       >>> clf()
       >>> xlabel("Source out-degree")
471
       Text(...)
472
       >>> ylabel("Target out-degree")
473
       Text(...)
474 475
       >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
476
       >>> savefig("avg_corr.svg")
477

478
    .. testcode:: avg_neighbor_corr
479 480
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
481
       savefig("avg_corr.pdf")
482

483
    .. figure:: avg_corr.*
484 485 486
        :align: center

        Average out/out degree correlation.
487 488
    """

489
    ret = libgraph_tool_correlations.\
490
          vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source),
491
                                 _degree(g, deg_target), _prop("e", g, weight),
492
                                 [float(x) for x in bins])
493 494
    return [ret[0], ret[1], ret[2][0]]

495
avg_neighbour_corr = avg_neighbor_corr
496 497

def avg_combined_corr(g, deg1, deg2, bins=[0, 1]):
498 499 500 501 502
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
503
    g : :class:`~graph_tool.Graph`
504
        Graph to be used.
505
    deg1 : string or :class:`~graph_tool.VertexPropertyMap`
506
        first degree type ("in", "out" or "total") or vertex property map.
507
    deg2 : string or :class:`~graph_tool.VertexPropertyMap`
508
        second degree type ("in", "out" or "total") or vertex property map.
509 510 511 512
    bins : list (optional, default: [0, 1])
        Bins to be used for the first degrees. If the list has size 2, it is
        used as the constant width of an automatically generated bin range,
        starting from the first value.
513 514 515

    Returns
    -------
516
    bin_avg : :class:`~numpy.ndarray`
517
        Array with the deg2 average for the deg1 bins.
518
    bin_dev : :class:`~numpy.ndarray`
519
        Array with the standard deviation of the deg2 average for the deg1 bins.
520
    bins : :class:`~numpy.ndarray`
521 522 523 524 525 526 527 528 529 530 531 532 533
        The deg1 bins.

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
534
    avg_neighbor_corr: average nearest-neighbor correlation
535 536 537 538
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
539

540 541 542
    .. testsetup:: avg_combined_corr

       from pylab import *
543 544 545
       np.random.seed(42)
       gt.seed_rng(42)

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560

    .. doctest:: avg_combined_corr

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         i = np.random.randint(1,max+1)
       ...         j = np.random.randint(1,max+1)
       ...         accept = np.random.random() < (sin(i/pi)*sin(j/pi)+1)/2
       ...     return i,j
       ...
       >>> g = gt.random_graph(10000, lambda: sample_k(40))
       >>> h = gt.avg_combined_corr(g, "in", "out")
       >>> clf()
       >>> xlabel("In-degree")
561
       Text(...)
562
       >>> ylabel("Out-degree")
563
       Text(...)
564 565
       >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
566
       >>> savefig("combined_avg_corr.svg")
567 568

    .. testcode:: avg_combined_corr
569 570
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
571
       savefig("combined_avg_corr.pdf")
572

573
    .. figure:: combined_avg_corr.*
574 575 576
        :align: center

        Average combined in/out-degree correlation.
577 578
    """

579 580
    ret = libgraph_tool_correlations.\
          vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
581 582
                                          _degree(g, deg2),
                                          [float(x) for x in bins])
583
    return [ret[0], ret[1], ret[2][0]]