__init__.py 19.4 KB
Newer Older
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4 5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2018 Tiago de Paula Peixoto <tiago@skewed.de>
7 8 9 10 11 12 13 14 15 16 17 18 19 20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21 22 23
"""
``graph_tool.correlations`` - Correlations
------------------------------------------
24 25 26 27 28 29 30 31 32 33 34

Summary
+++++++

.. autosummary::
   :nosignatures:

   assortativity
   scalar_assortativity
   corr_hist
   combined_corr_hist
35
   avg_neighbor_corr
36 37 38 39
   avg_combined_corr

Contents
++++++++
40 41
"""

42
from __future__ import division, absolute_import, print_function
43

Tiago Peixoto's avatar
Tiago Peixoto committed
44
from .. dl_import import dl_import
45
dl_import("from . import libgraph_tool_correlations")
46

47
from .. import _degree, _prop, _check_prop_scalar, VertexPropertyMap
48 49
from numpy import *

50
__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
51 52
           "combined_corr_hist", "avg_neighbor_corr", "avg_neighbour_corr",
           "avg_combined_corr"]
53

54

55 56
def assortativity(g, deg, eweight=None):
    r"""Obtain the assortativity coefficient for the given graph.
57 58 59

    Parameters
    ----------
60
    g : :class:`~graph_tool.Graph`
61
        Graph to be used.
62
    deg : string or :class:`~graph_tool.VertexPropertyMap`
63
        Degree type ("in", "out" or "total") or vertex property map, which
64
        specifies the vertex types.
65
    eweight : :class:`~graph_tool.EdgePropertyMap` (optional, default: `None`)
66 67
        If given, this will specify the edge weights, otherwise a constant value
        of one will be used.
68 69 70 71 72 73 74 75 76 77 78 79

    Returns
    -------
    assortativity coefficient : tuple of two floats
        The assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
80
    avg_neighbor_corr: average nearest-neighbor correlation
81 82 83 84
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
85
    The assortativity coefficient [newman-mixing-2003]_ tells in a concise
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    fashion how vertices of different types are preferentially connected amongst
    themselves, and is defined by

    .. math::
        r = \frac{\sum_i e_{ii} - \sum_i a_i b_i}{1-\sum_i a_i b_i}

    where :math:`a_i=\sum_je_{ij}` and :math:`b_j=\sum_ie_{ij}`, and
    :math:`e_{ij}` is the fraction of edges from a vertex of type
    i to a vertex of type j.


    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
103

104
    .. doctest:: assortativity
105

106 107 108
       >>> g = gt.collection.data["pgp-strong-2009"]
       >>> g = gt.GraphView(g, directed=False)
       >>> gt.assortativity(g, "out")
109
       (0.0240578552..., 0.0003272847...)
110 111 112

    References
    ----------
113
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Tiago Peixoto's avatar
Tiago Peixoto committed
114
        Phys. Rev. E 67, 026126 (2003), :doi:`10.1103/PhysRevE.67.026126`
115 116 117
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife

    """
118
    return libgraph_tool_correlations.\
119 120
           assortativity_coefficient(g._Graph__graph, _degree(g, deg),
                                     _prop("e", g, eweight))
121

122

123
def scalar_assortativity(g, deg, eweight=None):
124 125 126 127 128
    r"""
    Obtain the scalar assortativity coefficient for the given graph.

    Parameters
    ----------
129
    g : :class:`~graph_tool.Graph`
130
        Graph to be used.
131
    deg : string or :class:`~graph_tool.VertexPropertyMap`
132 133
        Degree type ("in", "out" or "total") or vertex property map, which
        specifies the vertex scalar values.
134
    eweight : :class:`~graph_tool.EdgePropertyMap` (optional, default: `None`)
135 136
        If given, this will specify the edge weights, otherwise a constant value
        of one will be used.
137 138 139 140 141 142 143 144 145 146 147 148

    Returns
    -------
    scalar assortativity coefficient : tuple of two floats
        The scalar assortativity coefficient, and its variance.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
149
    avg_neighbor_corr: average nearest-neighbor correlation
150 151 152 153
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
154
    The scalar assortativity coefficient [newman-mixing-2003]_ tells in a
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
    concise fashion how vertices of different types are preferentially connected
    amongst themselves, and is defined by

    .. math::
        r = \frac{\sum_{xy} xy(e_{xy} - a_x b_y)}{\sigma_a\sigma_b}

    where :math:`a_x=\sum_ye_{xy}` and :math:`b_y=\sum_xe_{xy}`, and
    :math:`e_{xy}` is the fraction of edges from a vertex of type
    x to a vertex of type y.

    The variance is obtained with the `jackknife method`_.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
171

172
    .. doctest:: scalar_assortativity
173

174 175 176
       >>> g = gt.collection.data["pgp-strong-2009"]
       >>> g = gt.GraphView(g, directed=False)
       >>> gt.scalar_assortativity(g, "out")
177
       (0.026384932418..., 0.001213444270...)
178 179 180

    References
    ----------
181
    .. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Tiago Peixoto's avatar
Tiago Peixoto committed
182
        Phys. Rev. E 67, 026126 (2003), :doi:`10.1103/PhysRevE.67.026126`
183 184
    .. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife
    """
185 186
    if isinstance(deg, VertexPropertyMap):
        _check_prop_scalar(deg, name="deg")
187
    return libgraph_tool_correlations.\
188 189
           scalar_assortativity_coefficient(g._Graph__graph, _degree(g, deg),
                                            _prop("e", g, eweight))
190

191 192

def corr_hist(g, deg_source, deg_target, bins=[[0, 1], [0, 1]], weight=None,
193 194 195 196 197 198
              float_count=True):
    r"""
    Obtain the correlation histogram for the given graph.

    Parameters
    ----------
199
    g : :class:`~graph_tool.Graph`
200
        Graph to be used.
201
    deg_source : string or :class:`~graph_tool.VertexPropertyMap`
202 203
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
204
    deg_target : string or :class:`~graph_tool.VertexPropertyMap`
205 206
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
207
    bins : list of lists (optional, default: [[0, 1], [0, 1]])
208
        A list of bin edges to be used for the source and target degrees. If any
209 210 211
        list has size 2, it is used to create an automatically generated bin
        range starting from the first value, and with constant bin width given
        by the second value.
212 213 214 215 216 217 218 219 220
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
221
    bin_counts : :class:`~numpy.ndarray`
222
        Two-dimensional array with the bin counts.
223
    source_bins : :class:`~numpy.ndarray`
224
        Source degree bins
225
    target_bins : :class:`~numpy.ndarray`
226 227 228 229 230 231 232 233 234
        Target degree bins


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
235
    avg_neighbor_corr: average nearest-neighbor correlation
236 237 238 239 240
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The correlation histogram counts, for every vertex with degree (or scalar
241
    property) 'source_deg', the number of out-neighbors with degree (or scalar
242 243 244 245 246 247
    property) 'target_deg'.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
248

249 250 251
    .. testsetup:: corr_hist

       from pylab import *
252 253 254
       np.random.seed(42)
       gt.seed_rng(42)

255 256 257 258 259 260 261 262 263
    .. doctest:: corr_hist

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         k = np.random.randint(1,max+1)
       ...         accept = np.random.random() < 1.0/k
       ...     return k
       ...
264 265 266
       >>> g = gt.random_graph(10000, lambda: sample_k(40),
       ...                     model="probabilistic-configuration",
       ...                     edge_probs=lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
267 268 269 270
       ...                     directed=False, n_iter=100)
       >>> h = gt.corr_hist(g, "out", "out")
       >>> clf()
       >>> xlabel("Source out-degree")
271
       Text(...)
272
       >>> ylabel("Target out-degree")
273
       Text(...)
274 275 276 277
       >>> imshow(h[0].T, interpolation="nearest", origin="lower")
       <...>
       >>> colorbar()
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
278
       >>> savefig("corr.svg")
279 280

    .. testcode:: corr_hist
281 282
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
283
       savefig("corr.pdf")
284

285
    .. figure:: corr.*
286 287 288
        :align: center

        Out/out-degree correlation histogram.
289 290
    """

291
    ret = libgraph_tool_correlations.\
292 293
          vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source),
                                       _degree(g, deg_target),
294 295 296
                                       _prop("e", g, weight),
                                       [float(x) for x in bins[0]],
                                       [float(x) for x in bins[1]])
297 298
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
299

Tiago Peixoto's avatar
Tiago Peixoto committed
300

301
def combined_corr_hist(g, deg1, deg2, bins=[[0, 1], [0, 1]], float_count=True):
302 303 304 305 306
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
307
    g : :class:`~graph_tool.Graph`
308
        Graph to be used.
309
    deg1 : string or :class:`~graph_tool.VertexPropertyMap`
310
        first degree type ("in", "out" or "total") or vertex property map.
311
    deg2 : string or :class:`~graph_tool.VertexPropertyMap`
312
        second degree type ("in", "out" or "total") or vertex property map.
313 314
    bins : list of lists (optional, default: [[0, 1], [0, 1]])
        A list of bin edges to be used for the first and second degrees. If any
315 316 317
        list has size 2, it is used to create an automatically generated bin
        range starting from the first value, and with constant bin width given
        by the second value.
318 319 320 321 322 323 324
    float_count : bool (optional, default: True)
        If True, the bin counts are converted float variables, which is useful
        for normalization, and other processing. It False, the bin counts will
        be unsigned integers.

    Returns
    -------
325
    bin_counts : :class:`~numpy.ndarray`
326
        Two-dimensional array with the bin counts.
327
    first_bins : :class:`~numpy.ndarray`
328
        First degree bins
329
    second_bins : :class:`~numpy.ndarray`
330 331 332 333 334 335 336 337 338 339 340 341
        Second degree bins

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.

    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
342
    avg_neighbor_corr: average nearest-neighbor correlation
343 344 345 346
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
347

348 349 350
    .. testsetup:: combined_corr_hist

       from pylab import *
351 352 353
       np.random.seed(42)
       gt.seed_rng(42)

354 355 356 357 358 359 360 361 362 363 364 365 366 367
    .. doctest:: combined_corr_hist

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         i = np.random.randint(1, max + 1)
       ...         j = np.random.randint(1, max + 1)
       ...         accept = np.random.random() < (sin(i / pi) * sin(j / pi) + 1) / 2
       ...     return i,j
       ...
       >>> g = gt.random_graph(10000, lambda: sample_k(40))
       >>> h = gt.combined_corr_hist(g, "in", "out")
       >>> clf()
       >>> xlabel("In-degree")
368
       Text(...)
369
       >>> ylabel("Out-degree")
370
       Text(...)
371 372 373 374
       >>> imshow(h[0].T, interpolation="nearest", origin="lower")
       <...>
       >>> colorbar()
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
375
       >>> savefig("combined_corr.svg")
376 377

    .. testcode:: combined_corr_hist
378 379
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
380
       savefig("combined_corr.pdf")
381

382
    .. figure:: combined_corr.*
383 384 385 386
        :align: center

        Combined in/out-degree correlation histogram.

387
    """
388
    ret = libgraph_tool_correlations.\
389
          vertex_combined_correlation_histogram(g._Graph__graph,
390 391
                                                _degree(g, deg1),
                                                _degree(g, deg2),
392 393
                                                [float(x) for x in bins[0]],
                                                [float(x) for x in bins[1]])
394 395
    return [array(ret[0], dtype="float64") if float_count else ret[0],
            [ret[1][0], ret[1][1]]]
396

Tiago Peixoto's avatar
Tiago Peixoto committed
397

398
def avg_neighbor_corr(g, deg_source, deg_target, bins=[0, 1], weight=None):
399
    r"""
400
    Obtain the average neighbor-neighbor correlation for the given graph.
401 402 403

    Parameters
    ----------
404
    g : :class:`~graph_tool.Graph`
405
        Graph to be used.
406
    deg_source : string or :class:`~graph_tool.VertexPropertyMap`
407 408
        degree type ("in", "out" or "total") or vertex property map for the
        source vertex.
409
    deg_target : string or :class:`~graph_tool.VertexPropertyMap`
410 411
        degree type ("in", "out" or "total") or vertex property map for the
        target vertex.
412 413 414 415
    bins : list (optional, default: [0, 1])
        Bins to be used for the source degrees. If the list has size 2, it is
        used as the constant width of an automatically generated bin range,
        starting from the first value.
416 417 418 419 420
    weight : edge property map (optional, default: None)
        Weight (multiplicative factor) to be used on each edge.

    Returns
    -------
421
    bin_avg : :class:`~numpy.ndarray`
422
        Array with the deg_target average for the get_source bins.
423
    bin_dev : :class:`~numpy.ndarray`
424 425
        Array with the standard deviation of the deg_target average for the
        get_source bins.
426 427
    bins : :class:`~numpy.ndarray`
        Source degree bins,
428 429 430 431 432 433 434 435


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
436
    avg_neighbor_corr: average nearest-neighbor correlation
437 438 439 440 441 442
    avg_combined_corr: average combined single-vertex correlation

    Notes
    -----
    The average correlation is the average, for every vertex with degree (or
    scalar property) 'source_deg', the of the 'target_deg' degree (or
443
    scalar property) of its neighbors.
444 445 446 447 448

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
449

450
    .. testsetup:: avg_neighbor_corr
451 452

       from pylab import *
453 454 455
       np.random.seed(42)
       gt.seed_rng(42)

456

457
    .. doctest:: avg_neighbor_corr
458 459 460 461 462 463 464 465

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         k = np.random.randint(1,max+1)
       ...         accept = np.random.random() < 1.0 / k
       ...     return k
       ...
466 467 468
       >>> g = gt.random_graph(10000, lambda: sample_k(40),
       ...                     model="probabilistic-configuration",
       ...                     edge_probs=lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
469
       ...                     directed=False, n_iter=100)
470
       >>> h = gt.avg_neighbor_corr(g, "out", "out")
471 472
       >>> clf()
       >>> xlabel("Source out-degree")
473
       Text(...)
474
       >>> ylabel("Target out-degree")
475
       Text(...)
476 477
       >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
478
       >>> savefig("avg_corr.svg")
479

480
    .. testcode:: avg_neighbor_corr
481 482
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
483
       savefig("avg_corr.pdf")
484

485
    .. figure:: avg_corr.*
486 487 488
        :align: center

        Average out/out degree correlation.
489 490
    """

491
    ret = libgraph_tool_correlations.\
492
          vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source),
493
                                 _degree(g, deg_target), _prop("e", g, weight),
494
                                 [float(x) for x in bins])
495 496
    return [ret[0], ret[1], ret[2][0]]

497
avg_neighbour_corr = avg_neighbor_corr
498 499

def avg_combined_corr(g, deg1, deg2, bins=[0, 1]):
500 501 502 503 504
    r"""
    Obtain the single-vertex combined correlation histogram for the given graph.

    Parameters
    ----------
505
    g : :class:`~graph_tool.Graph`
506
        Graph to be used.
507
    deg1 : string or :class:`~graph_tool.VertexPropertyMap`
508
        first degree type ("in", "out" or "total") or vertex property map.
509
    deg2 : string or :class:`~graph_tool.VertexPropertyMap`
510
        second degree type ("in", "out" or "total") or vertex property map.
511 512 513 514
    bins : list (optional, default: [0, 1])
        Bins to be used for the first degrees. If the list has size 2, it is
        used as the constant width of an automatically generated bin range,
        starting from the first value.
515 516 517

    Returns
    -------
518
    bin_avg : :class:`~numpy.ndarray`
519
        Array with the deg2 average for the deg1 bins.
520
    bin_dev : :class:`~numpy.ndarray`
521
        Array with the standard deviation of the deg2 average for the deg1 bins.
522
    bins : :class:`~numpy.ndarray`
523 524 525 526 527 528 529 530 531 532 533 534 535
        The deg1 bins.

    Notes
    -----
    If enabled during compilation, this algorithm runs in parallel.


    See Also
    --------
    assortativity: assortativity coefficient
    scalar_assortativity: scalar assortativity coefficient
    corr_hist: vertex-vertex correlation histogram
    combined_corr_hist: combined single-vertex correlation histogram
536
    avg_neighbor_corr: average nearest-neighbor correlation
537 538 539 540
    avg_combined_corr: average combined single-vertex correlation

    Examples
    --------
541

542 543 544
    .. testsetup:: avg_combined_corr

       from pylab import *
545 546 547
       np.random.seed(42)
       gt.seed_rng(42)

548 549 550 551 552 553 554 555 556 557 558 559 560 561 562

    .. doctest:: avg_combined_corr

       >>> def sample_k(max):
       ...     accept = False
       ...     while not accept:
       ...         i = np.random.randint(1,max+1)
       ...         j = np.random.randint(1,max+1)
       ...         accept = np.random.random() < (sin(i/pi)*sin(j/pi)+1)/2
       ...     return i,j
       ...
       >>> g = gt.random_graph(10000, lambda: sample_k(40))
       >>> h = gt.avg_combined_corr(g, "in", "out")
       >>> clf()
       >>> xlabel("In-degree")
563
       Text(...)
564
       >>> ylabel("Out-degree")
565
       Text(...)
566 567
       >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
       <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
568
       >>> savefig("combined_avg_corr.svg")
569 570

    .. testcode:: avg_combined_corr
571 572
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
573
       savefig("combined_avg_corr.pdf")
574

575
    .. figure:: combined_avg_corr.*
576 577 578
        :align: center

        Average combined in/out-degree correlation.
579 580
    """

581 582
    ret = libgraph_tool_correlations.\
          vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
583 584
                                          _degree(g, deg2),
                                          [float(x) for x in bins])
585
    return [ret[0], ret[1], ret[2][0]]