__init__.py 72.3 KB
Newer Older
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4 5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2017 Tiago de Paula Peixoto <tiago@skewed.de>
7 8 9 10 11 12 13 14 15 16 17 18
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
Tiago Peixoto's avatar
Tiago Peixoto committed
19
# along with this program.  If not, see <http://www.gnu.org/licenses/>.s
20

21
"""
22
``graph_tool.generation`` - Random graph generation
23
---------------------------------------------------
24 25 26 27 28 29 30 31 32

Summary
+++++++

.. autosummary::
   :nosignatures:

   random_graph
   random_rewire
33
   generate_sbm
34 35 36
   predecessor_tree
   line_graph
   graph_union
37
   triangulation
38 39
   lattice
   geometric_graph
40
   price_network
Tiago Peixoto's avatar
Tiago Peixoto committed
41
   complete_graph
Tiago Peixoto's avatar
Tiago Peixoto committed
42
   circular_graph
43
   condensation_graph
44 45 46

Contents
++++++++
47 48
"""

49
from __future__ import division, absolute_import, print_function
50 51 52
import sys
if sys.version_info < (3,):
    range = xrange
53

Tiago Peixoto's avatar
Tiago Peixoto committed
54
from .. dl_import import dl_import
55
dl_import("from . import libgraph_tool_generation")
56

57 58
from .. import Graph, GraphView, _check_prop_scalar, _prop, _limit_args, \
    _gt_type, _get_rng, _c_str, libcore
Tiago Peixoto's avatar
Tiago Peixoto committed
59
from .. stats import label_parallel_edges, label_self_loops
60 61
import inspect
import types
62 63
import numpy
import numpy.random
64

65 66 67 68
__all__ = ["random_graph", "random_rewire", "generate_sbm", "predecessor_tree",
           "line_graph", "graph_union", "triangulation", "lattice",
           "geometric_graph", "price_network", "complete_graph",
           "circular_graph", "condensation_graph"]
69

Tiago Peixoto's avatar
Tiago Peixoto committed
70

71 72
def random_graph(N, deg_sampler, directed=True,
                 parallel_edges=False, self_loops=False, block_membership=None,
73
                 block_type="int", degree_block=False,
74
                 random=True, verbose=False, **kwargs):
Tiago Peixoto's avatar
Tiago Peixoto committed
75
    r"""
76 77 78 79 80 81 82
    Generate a random graph, with a given degree distribution and (optionally)
    vertex-vertex correlation.

    The graph will be randomized via the :func:`~graph_tool.generation.random_rewire`
    function, and any remaining parameters will be passed to that function.
    Please read its documentation for all the options regarding the different
    statistical models which can be chosen.
Tiago Peixoto's avatar
Tiago Peixoto committed
83 84 85 86 87 88 89 90 91 92 93

    Parameters
    ----------
    N : int
        Number of vertices in the graph.
    deg_sampler : function
        A degree sampler function which is called without arguments, and returns
        a tuple of ints representing the in and out-degree of a given vertex (or
        a single int for undirected graphs, representing the out-degree). This
        function is called once per vertex, but may be called more times, if the
        degree sequence cannot be used to build a graph.
94

95
        Optionally, you can also pass a function which receives one or two
Tiago Peixoto's avatar
Tiago Peixoto committed
96
        arguments. If ``block_membership is None``, the single argument passed
97
        will be the index of the vertex which will receive the degree.  If
Tiago Peixoto's avatar
Tiago Peixoto committed
98
        ``block_membership is not None``, the first value passed will be the vertex
99
        index, and the second will be the block value of the vertex.
100
    directed : bool (optional, default: ``True``)
Tiago Peixoto's avatar
Tiago Peixoto committed
101
        Whether the generated graph should be directed.
102 103 104 105
    parallel_edges : bool (optional, default: ``False``)
        If ``True``, parallel edges are allowed.
    self_loops : bool (optional, default: ``False``)
        If ``True``, self-loops are allowed.
106
    block_membership : list or :class:`~numpy.ndarray` or function (optional, default: ``None``)
107
        If supplied, the graph will be sampled from a stochastic blockmodel
108 109 110
        ensemble, and this parameter specifies the block membership of the
        vertices, which will be passed to the
        :func:`~graph_tool.generation.random_rewire` function.
111 112 113 114

        If the value is a list or a :class:`~numpy.ndarray`, it must have
        ``len(block_membership) == N``, and the values will define to which
        block each vertex belongs.
115 116 117 118

        If this value is a function, it will be used to sample the block
        types. It must be callable either with no arguments or with a single
        argument which will be the vertex index. In either case it must return
119
        a type compatible with the ``block_type`` parameter.
120 121 122 123

        See the documentation for the ``vertex_corr`` parameter of the
        :func:`~graph_tool.generation.random_rewire` function which specifies
        the correlation matrix.
124
    block_type : string (optional, default: ``"int"``)
Tiago Peixoto's avatar
Tiago Peixoto committed
125
        Value type of block labels. Valid only if ``block_membership is not None``.
126 127 128 129 130
    degree_block : bool (optional, default: ``False``)
        If ``True``, the degree of each vertex will be appended to block labels
        when constructing the blockmodel, such that the resulting block type
        will be a pair :math:`(r, k)`, where :math:`r` is the original block
        label.
131 132 133 134 135
    random : bool (optional, default: ``True``)
        If ``True``, the returned graph is randomized. Otherwise a deterministic
        placement of the edges will be used.
    verbose : bool (optional, default: ``False``)
        If ``True``, verbose information is displayed.
Tiago Peixoto's avatar
Tiago Peixoto committed
136 137 138

    Returns
    -------
139
    random_graph : :class:`~graph_tool.Graph`
Tiago Peixoto's avatar
Tiago Peixoto committed
140
        The generated graph.
141 142
    blocks : :class:`~graph_tool.PropertyMap`
        A vertex property map with the block values. This is only returned if
Tiago Peixoto's avatar
Tiago Peixoto committed
143
        ``block_membership is not None``.
Tiago Peixoto's avatar
Tiago Peixoto committed
144 145 146

    See Also
    --------
147
    random_rewire: in-place graph shuffling
Tiago Peixoto's avatar
Tiago Peixoto committed
148 149 150

    Notes
    -----
Tiago Peixoto's avatar
Tiago Peixoto committed
151 152 153
    The algorithm makes sure the degree sequence is graphical (i.e. realizable)
    and keeps re-sampling the degrees if is not. With a valid degree sequence,
    the edges are placed deterministically, and later the graph is shuffled with
154 155
    the :func:`~graph_tool.generation.random_rewire` function, with all
    remaining parameters passed to it.
Tiago Peixoto's avatar
Tiago Peixoto committed
156

157
    The complexity is :math:`O(V + E)` if parallel edges are allowed, and
158
    :math:`O(V + E \times\text{n-iter})` if parallel edges are not allowed.
159 160 161 162 163 164


    .. note ::

        If ``parallel_edges == False`` this algorithm only guarantees that the
        returned graph will be a random sample from the desired ensemble if
165
        ``n_iter`` is sufficiently large. The algorithm implements an
166 167 168 169
        efficient Markov chain based on edge swaps, with a mixing time which
        depends on the degree distribution and correlations desired. If degree
        correlations are provided, the mixing time tends to be larger.

Tiago Peixoto's avatar
Tiago Peixoto committed
170 171
    Examples
    --------
172 173 174 175

    .. testcode::
       :hide:

176
       import numpy.random
177
       from pylab import *
178
       np.random.seed(43)
179
       gt.seed_rng(42)
Tiago Peixoto's avatar
Tiago Peixoto committed
180 181 182 183 184 185 186

    This is a degree sampler which uses rejection sampling to sample from the
    distribution :math:`P(k)\propto 1/k`, up to a maximum.

    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
187 188
    ...         k = np.random.randint(1,max+1)
    ...         accept = np.random.random() < 1.0/k
Tiago Peixoto's avatar
Tiago Peixoto committed
189 190 191 192 193 194 195 196 197 198 199
    ...     return k
    ...

    The following generates a random undirected graph with degree distribution
    :math:`P(k)\propto 1/k` (with k_max=40) and an *assortative* degree
    correlation of the form:

    .. math::

        P(i,k) \propto \frac{1}{1+|i-k|}

200 201
    >>> g = gt.random_graph(1000, lambda: sample_k(40), model="probabilistic-configuration",
    ...                     edge_probs=lambda i, k: 1.0 / (1 + abs(i - k)), directed=False,
202
    ...                     n_iter=100)
Tiago Peixoto's avatar
Tiago Peixoto committed
203 204 205 206 207 208 209 210 211 212 213 214

    The following samples an in,out-degree pair from the joint distribution:

    .. math::

        p(j,k) = \frac{1}{2}\frac{e^{-m_1}m_1^j}{j!}\frac{e^{-m_1}m_1^k}{k!} +
                 \frac{1}{2}\frac{e^{-m_2}m_2^j}{j!}\frac{e^{-m_2}m_2^k}{k!}

    with :math:`m_1 = 4` and :math:`m_2 = 20`.

    >>> def deg_sample():
    ...    if random() > 0.5:
215
    ...        return np.random.poisson(4), np.random.poisson(4)
Tiago Peixoto's avatar
Tiago Peixoto committed
216
    ...    else:
217
    ...        return np.random.poisson(20), np.random.poisson(20)
218
    ...
Tiago Peixoto's avatar
Tiago Peixoto committed
219 220 221 222 223 224 225

    The following generates a random directed graph with this distribution, and
    plots the combined degree correlation.

    >>> g = gt.random_graph(20000, deg_sample)
    >>>
    >>> hist = gt.combined_corr_hist(g, "in", "out")
226
    >>>
227 228
    >>> figure()
    <...>
229
    >>> imshow(hist[0].T, interpolation="nearest", origin="lower")
Tiago Peixoto's avatar
Tiago Peixoto committed
230 231 232
    <...>
    >>> colorbar()
    <...>
233
    >>> xlabel("in-degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
234
    <...>
235
    >>> ylabel("out-degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
236
    <...>
237
    >>> tight_layout()
238
    >>> savefig("combined-deg-hist.pdf")
Tiago Peixoto's avatar
Tiago Peixoto committed
239

240 241 242 243 244
    .. testcode::
       :hide:

       savefig("combined-deg-hist.png")

245
    .. figure:: combined-deg-hist.*
Tiago Peixoto's avatar
Tiago Peixoto committed
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
        :align: center

        Combined degree histogram.

    A correlated directed graph can be build as follows. Consider the following
    degree correlation:

    .. math::

         P(j',k'|j,k)=\frac{e^{-k}k^{j'}}{j'!}
         \frac{e^{-(20-j)}(20-j)^{k'}}{k'!}

    i.e., the in->out correlation is "disassortative", the out->in correlation
    is "assortative", and everything else is uncorrelated.
    We will use a flat degree distribution in the range [1,20).

    >>> p = scipy.stats.poisson
    >>> g = gt.random_graph(20000, lambda: (sample_k(19), sample_k(19)),
264 265 266
    ...                     model="probabilistic-configuration",
    ...                     edge_probs=lambda a,b: (p.pmf(a[0], b[1]) *
    ...                                             p.pmf(a[1], 20 - b[0])),
267
    ...                     n_iter=100)
Tiago Peixoto's avatar
Tiago Peixoto committed
268 269 270

    Lets plot the average degree correlations to check.

271
    >>> figure()
272
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
273
    >>> corr = gt.avg_neighbour_corr(g, "in", "in")
274
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
275
    ...         label=r"$\left<\text{in}\right>$ vs in")
276
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
277
    >>> corr = gt.avg_neighbour_corr(g, "in", "out")
278
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
279
    ...         label=r"$\left<\text{out}\right>$ vs in")
280
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
281
    >>> corr = gt.avg_neighbour_corr(g, "out", "in")
282
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
283
    ...          label=r"$\left<\text{in}\right>$ vs out")
284
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
285
    >>> corr = gt.avg_neighbour_corr(g, "out", "out")
286
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
287
    ...          label=r"$\left<\text{out}\right>$ vs out")
Tiago Peixoto's avatar
Tiago Peixoto committed
288
    <...>
289
    >>> legend(loc="lower right", borderaxespad=0., framealpha=0.8)
290 291
    <...>
    >>> xlabel("Source degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
292
    <...>
293
    >>> ylabel("Average target degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
294
    <...>
295
    >>> tight_layout()
296
    >>> savefig("deg-corr-dir.pdf")
Tiago Peixoto's avatar
Tiago Peixoto committed
297

298 299 300 301 302
    .. testcode::
       :hide:

       savefig("deg-corr-dir.png")

303
    .. figure:: deg-corr-dir.*
Tiago Peixoto's avatar
Tiago Peixoto committed
304 305 306
        :align: center

        Average nearest neighbour correlations.
307 308


309
    **Stochastic blockmodels**
310 311


312 313 314
    The following example shows how a stochastic blockmodel
    [holland-stochastic-1983]_ [karrer-stochastic-2011]_ can be generated. We
    will consider a system of 10 blocks, which form communities. The connection
315 316
    probability will be given by

317
    >>> def prob(a, b):
318 319 320 321 322 323 324
    ...    if a == b:
    ...        return 0.999
    ...    else:
    ...        return 0.001

    The blockmodel can be generated as follows.

Tiago Peixoto's avatar
Tiago Peixoto committed
325
    >>> g, bm = gt.random_graph(2000, lambda: poisson(10), directed=False,
326
    ...                         model="blockmodel",
327
    ...                         block_membership=lambda: randint(10),
328
    ...                         edge_probs=prob)
Tiago Peixoto's avatar
Tiago Peixoto committed
329
    >>> gt.graph_draw(g, vertex_fill_color=bm, edge_color="black", output="blockmodel.pdf")
330 331
    <...>

332 333 334
    .. testcode::
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
335
       gt.graph_draw(g, vertex_fill_color=bm, edge_color="black", output="blockmodel.png")
336

337
    .. figure:: blockmodel.*
338 339 340 341 342 343 344 345 346 347
        :align: center

        Simple blockmodel with 10 blocks.


    References
    ----------
    .. [metropolis-equations-1953]  Metropolis, N.; Rosenbluth, A.W.;
       Rosenbluth, M.N.; Teller, A.H.; Teller, E. "Equations of State
       Calculations by Fast Computing Machines". Journal of Chemical Physics 21
348
       (6): 1087-1092 (1953). :doi:`10.1063/1.1699114`
349
    .. [hastings-monte-carlo-1970] Hastings, W.K. "Monte Carlo Sampling Methods
350
       Using Markov Chains and Their Applications". Biometrika 57 (1): 97-109 (1970).
351
       :doi:`10.1093/biomet/57.1.97`
352 353 354 355 356 357
    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, and
       Samuel Leinhardt, "Stochastic blockmodels: First steps," Social Networks
       5, no. 2: 109-13 (1983) :doi:`10.1016/0378-8733(83)90021-7`
    .. [karrer-stochastic-2011] Brian Karrer and M. E. J. Newman, "Stochastic
       blockmodels and community structure in networks," Physical Review E 83,
       no. 1: 016107 (2011) :doi:`10.1103/PhysRevE.83.016107` :arxiv:`1008.3926`
Tiago Peixoto's avatar
Tiago Peixoto committed
358
    """
359

360
    g = Graph()
361

362 363
    if (type(block_membership) is types.FunctionType or
        type(block_membership) is types.LambdaType):
364 365
        btype = block_type
        bm = []
366
        if len(inspect.getargspec(block_membership)[0]) == 0:
367
            for i in range(N):
368
                bm.append(block_membership())
369
        else:
370
            for i in range(N):
371 372 373 374
                bm.append(block_membership(i))
        block_membership = bm
    elif block_membership is not None:
        btype = _gt_type(block_membership[0])
375 376

    if len(inspect.getargspec(deg_sampler)[0]) > 0:
377 378
        if block_membership is not None:
            sampler = lambda i: deg_sampler(i, block_membership[i])
379
        else:
Tiago Peixoto's avatar
Tiago Peixoto committed
380
            sampler = deg_sampler
381 382 383
    else:
        sampler = lambda i: deg_sampler()

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
    if not directed:
        def sampler_wrap(*args):
            k = sampler(*args)
            try:
                return int(k)
            except:
                raise ValueError("degree value not understood: " + str(k))
    else:
        def sampler_wrap(*args):
            k = sampler(*args)
            try:
                return int(k[0]), int(k[1])
            except:
                raise ValueError("(in,out)-degree value pair not understood: " +
                                 str(k))

    libgraph_tool_generation.gen_graph(g._Graph__graph, N, sampler_wrap,
401
                                       not parallel_edges,
402
                                       not self_loops, not directed,
403
                                       _get_rng(), verbose, True)
404 405
    g.set_directed(directed)

406 407 408 409 410 411 412 413 414 415 416 417
    if degree_block:
        if btype in ["object", "string"] or "vector" in btype:
            btype = "object"
        elif btype in ["int", "int32_t", "bool"]:
            btype = "vector<int32_t>"
        elif btype in ["long", "int64_t"]:
            btype = "vector<int64_t>"
        elif btype in ["double"]:
            btype = "vector<double>"
        elif btype in ["long double"]:
            btype = "vector<long double>"

418
    if block_membership is not None:
419 420 421
        bm = g.new_vertex_property(btype)
        if btype in ["object", "string"] or "vector" in btype:
            for v in g.vertices():
422
                if not degree_block:
423
                    bm[v] = block_membership[int(v)]
424 425
                else:
                    if g.is_directed():
426
                        bm[v] = (block_membership[int(v)], v.in_degree(),
427 428
                                 v.out_degree())
                    else:
429
                        bm[v] = (block_membership[int(v)], v.out_degree())
430 431
        else:
            try:
432
                bm.a = block_membership
433 434 435
            except ValueError:
                bm = g.new_vertex_property("object")
                for v in g.vertices():
436
                    bm[v] = block_membership[int(v)]
437 438
    else:
        bm = None
439

Tiago Peixoto's avatar
Tiago Peixoto committed
440
    if random:
441
        g.set_fast_edge_removal(True)
442 443 444
        random_rewire(g, parallel_edges=parallel_edges,
                      self_loops=self_loops, verbose=verbose,
                      block_membership=bm, **kwargs)
445
        g.set_fast_edge_removal(False)
446

447 448 449 450
    if bm is None:
        return g
    else:
        return g, bm
451

Tiago Peixoto's avatar
Tiago Peixoto committed
452

453 454 455 456 457
@_limit_args({"model": ["erdos", "configuration", "constrained-configuration",
                        "probabilistic-configuration", "blockmodel-degree",
                        "blockmodel"]})
def random_rewire(g, model="configuration", n_iter=1, edge_sweep=True,
                  parallel_edges=False, self_loops=False, edge_probs=None,
458
                  block_membership=None, alias=True, cache_probs=True,
459
                  persist=False, pin=None, ret_fail=False, verbose=False):
460
    r"""Shuffle the graph in-place, following a variety of possible statistical
461 462
    models, chosen via the parameter ``model``.

463 464 465

    Parameters
    ----------
466
    g : :class:`~graph_tool.Graph`
467
        Graph to be shuffled. The graph will be modified.
468
    model : string (optional, default: ``"configuration"``)
469 470
        The following statistical models can be chosen, which determine how the
        edges are rewired.
471

472 473
        ``erdos``
           The edges will be rewired entirely randomly, and the resulting graph
474 475
           will correspond to the :math:`G(N,E)` Erdős–Rényi model.
        ``configuration``
476 477
           The edges will be rewired randomly, but the degree sequence of the
           graph will remain unmodified.
478
        ``constrained-configuration``
479
           The edges will be rewired randomly, but both the degree sequence of
480 481 482
           the graph and the *vertex-vertex (in,out)-degree correlations* will
           remain exactly preserved. If the ``block_membership`` parameter is
           passed, the block variables at the endpoints of the edges will be
483 484 485 486 487 488 489 490 491 492
           preserved, instead of the degree-degree correlation.
        ``probabilistic-configuration``
           This is similar to ``constrained-configuration``, but the
           vertex-vertex correlations are not preserved, but are instead sampled
           from an arbitrary degree-based probabilistic model specified via the
           ``edge_probs`` parameter. The degree-sequence is preserved.
        ``blockmodel-degree``
           This is just like ``probabilistic-configuration``, but the values
           passed to the ``edge_probs`` function will correspond to the block
           membership values specified by the ``block_membership`` parameter.
493
        ``blockmodel``
494 495
           This is just like ``blockmodel-degree``, but the degree sequence *is
           not* preserved during rewiring.
496 497 498 499 500 501 502 503 504
    n_iter : int (optional, default: ``1``)
        Number of iterations. If ``edge_sweep == True``, each iteration
        corresponds to an entire "sweep" over all edges. Otherwise this
        corresponds to the total number of edges which are randomly chosen for a
        swap attempt (which may repeat).
    edge_sweep : bool (optional, default: ``True``)
        If ``True``, each iteration will perform an entire "sweep" over the
        edges, where each edge is visited once in random order, and a edge swap
        is attempted.
Tiago Peixoto's avatar
Tiago Peixoto committed
505
    parallel_edges : bool (optional, default: ``False``)
506 507 508
        If ``True``, parallel edges are allowed.
    self_loops : bool (optional, default: ``False``)
        If ``True``, self-loops are allowed.
509 510 511
    edge_probs : function or sequence of triples (optional, default: ``None``)
        A function which determines the edge probabilities in the graph. In
        general it should have the following signature:
512 513 514

        .. code::

515
            def prob(r, s):
516 517 518
                ...
                return p

519
        where the return value should be a non-negative scalar.
520

521 522 523 524
        Alternatively, this parameter can be a list of triples of the form ``(r,
        s, p)``, with the same meaning as the ``r``, ``s`` and ``p`` values
        above. If a given ``(r, s)`` combination is not present in this list,
        the corresponding value of ``p`` is assumed to be zero. If the same
525
        ``(r, s)`` combination appears more than once, their ``p`` values will
526 527 528 529 530 531 532 533 534 535 536 537 538 539
        be summed together. This is useful when the correlation matrix is
        sparse, i.e. most entries are zero.

        If ``model == probabilistic-configuration`` the parameters ``r`` and
        ``s`` correspond respectively to the (in, out)-degree pair of the source
        vertex of an edge, and the (in,out)-degree pair of the target of the
        same edge (for undirected graphs, both parameters are scalars
        instead). The value of ``p`` should be a number proportional to the
        probability of such an edge existing in the generated graph.

        If ``model == blockmodel-degree`` or ``model == blockmodel``, the ``r``
        and ``s`` values passed to the function will be the block values of the
        respective vertices, as specified via the ``block_membership``
        parameter. The value of ``p`` should be a number proportional to the
540
        probability of such an edge existing in the generated graph.
541 542 543 544 545
    block_membership : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        If supplied, the graph will be rewired to conform to a blockmodel
        ensemble. The value must be a vertex property map which defines the
        block of each vertex.
    alias : bool (optional, default: ``True``)
546 547 548
        If ``True``, and ``model`` is any of ``probabilistic-configuration``,
        ``blockmodel-degree``, or ``blockmodel``, the alias method will be used
        to sample the candidate edges. In the case of ``blockmodel``, if
549
        ``parallel_edges == True`` and ``self_loops == True`` this makes the
550 551
        sampling of the edges direct (not rejection based), so that ``n_iter ==
        1`` is enough to get an uncorrelated sample.
552
    cache_probs : bool (optional, default: ``True``)
553
        If ``True``, the probabilities returned by the ``edge_probs`` parameter
554 555 556 557
        will be cached internally. This is crucial for good performance, since
        in this case the supplied python function is called only a few times,
        and not at every attempted edge rewire move. However, in the case were
        the different parameter combinations to the probability function is very
558 559 560 561 562 563 564 565
        large, the memory and time requirements to keep the cache may not be
        worthwhile.
    persist : bool (optional, default: ``False``)
        If ``True``, an edge swap which is rejected will be attempted again
        until it succeeds. This may improve the quality of the shuffling for
        some probabilistic models, and should be sufficiently fast for sparse
        graphs, but otherwise it may result in many repeated attempts for
        certain corner-cases in which edges are difficult to swap.
566 567 568 569
    pin : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge property map which, if provided, specifies which edges are allowed
        to be rewired. Edges for which the property value is ``1`` (or ``True``)
        will be left unmodified in the graph.
570 571 572 573 574 575
    verbose : bool (optional, default: ``False``)
        If ``True``, verbose information is displayed.


    Returns
    -------
576 577 578
    rejection_count : int
        Number of rejected edge moves (due to parallel edges or self-loops, or
        the probabilistic model used).
579 580 581 582 583 584 585

    See Also
    --------
    random_graph: random graph generation

    Notes
    -----
Tiago Peixoto's avatar
Tiago Peixoto committed
586
    This algorithm iterates through all the edges in the network and tries to
587 588
    swap its target or source with the target or source of another edge. The
    selected canditate swaps are chosen according to the ``model`` parameter.
Tiago Peixoto's avatar
Tiago Peixoto committed
589 590

    .. note::
591

592 593 594 595 596 597 598 599
        If ``parallel_edges = False``, parallel edges are not placed during
        rewiring. In this case, the returned graph will be a uncorrelated sample
        from the desired ensemble only if ``n_iter`` is sufficiently large. The
        algorithm implements an efficient Markov chain based on edge swaps, with
        a mixing time which depends on the degree distribution and correlations
        desired. If degree probabilistic correlations are provided, the mixing
        time tends to be larger.

Tiago Peixoto's avatar
Tiago Peixoto committed
600 601 602 603 604 605 606
        If ``model`` is either "probabilistic-configuration", "blockmodel" or
        "blockmodel-degree", the Markov chain still needs to be mixed, even if
        parallel edges and self-loops are allowed. In this case the Markov chain
        is implemented using the Metropolis-Hastings
        [metropolis-equations-1953]_ [hastings-monte-carlo-1970]_
        acceptance/rejection algorithm. It will eventually converge to the
        desired probabilities for sufficiently large values of ``n_iter``.
607

Tiago Peixoto's avatar
Tiago Peixoto committed
608

609
    Each edge is tentatively swapped once per iteration, so the overall
610 611
    complexity is :math:`O(V + E \times \text{n-iter})`. If ``edge_sweep ==
    False``, the complexity becomes :math:`O(V + E + \text{n-iter})`.
612

613 614 615 616 617
    Examples
    --------

    Some small graphs for visualization.

618 619 620 621 622 623 624 625
    .. testcode::
       :hide:

       from numpy.random import random, seed
       from pylab import *
       seed(43)
       gt.seed_rng(42)

626
    >>> g, pos = gt.triangulation(np.random.random((1000,2)))
627
    >>> pos = gt.arf_layout(g)
628
    >>> gt.graph_draw(g, pos=pos, output="rewire_orig.pdf", output_size=(300, 300))
629
    <...>
630 631 632 633 634 635

    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_orig.png", output_size=(300, 300))

636
    >>> ret = gt.random_rewire(g, "constrained-configuration")
637
    >>> pos = gt.arf_layout(g)
638
    >>> gt.graph_draw(g, pos=pos, output="rewire_corr.pdf", output_size=(300, 300))
639
    <...>
640 641 642 643 644 645

    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_corr.png", output_size=(300, 300))

646
    >>> ret = gt.random_rewire(g)
647
    >>> pos = gt.arf_layout(g)
648
    >>> gt.graph_draw(g, pos=pos, output="rewire_uncorr.pdf", output_size=(300, 300))
649
    <...>
650 651 652 653 654 655

    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_uncorr.png", output_size=(300, 300))

656
    >>> ret = gt.random_rewire(g, "erdos")
657
    >>> pos = gt.arf_layout(g)
658
    >>> gt.graph_draw(g, pos=pos, output="rewire_erdos.pdf", output_size=(300, 300))
659
    <...>
660

661 662 663 664 665
    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_erdos.png", output_size=(300, 300))

666
    Some `ridiculograms <http://www.youtube.com/watch?v=YS-asmU3p_4>`_ :
667

668 669 670 671
    .. image:: rewire_orig.*
    .. image:: rewire_corr.*
    .. image:: rewire_uncorr.*
    .. image:: rewire_erdos.*
672

673 674
    **From left to right**: Original graph; Shuffled graph, with degree correlations;
    Shuffled graph, without degree correlations; Shuffled graph, with random degrees.
675

676
    We can try with larger graphs to get better statistics, as follows.
677

678
    >>> figure()
679
    <...>
680 681
    >>> g = gt.random_graph(30000, lambda: sample_k(20), model="probabilistic-configuration",
    ...                     edge_probs=lambda i, j: exp(abs(i-j)), directed=False,
682
    ...                     n_iter=100)
683
    >>> corr = gt.avg_neighbour_corr(g, "out", "out")
684 685
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="Original")
    <...>
686
    >>> ret = gt.random_rewire(g, "constrained-configuration")
687
    >>> corr = gt.avg_neighbour_corr(g, "out", "out")
688 689
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="*", label="Correlated")
    <...>
690
    >>> ret = gt.random_rewire(g)
691
    >>> corr = gt.avg_neighbour_corr(g, "out", "out")
692 693
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="Uncorrelated")
    <...>
694
    >>> ret = gt.random_rewire(g, "erdos")
695
    >>> corr = gt.avg_neighbour_corr(g, "out", "out")
696 697
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label=r"Erd\H{o}s")
    <...>
698 699 700 701 702 703
    >>> xlabel("$k$")
    <...>
    >>> ylabel(r"$\left<k_{nn}\right>$")
    <...>
    >>> legend(loc="best")
    <...>
704
    >>> tight_layout()
705
    >>> savefig("shuffled-stats.pdf")
706

707 708 709 710 711 712
    .. testcode::
       :hide:

       savefig("shuffled-stats.png")


713
    .. figure:: shuffled-stats.*
714 715 716 717 718 719 720 721 722 723 724
        :align: center

        Average degree correlations for the different shuffled and non-shuffled
        graphs. The shuffled graph with correlations displays exactly the same
        correlation as the original graph.

    Now let's do it for a directed graph. See
    :func:`~graph_tool.generation.random_graph` for more details.

    >>> p = scipy.stats.poisson
    >>> g = gt.random_graph(20000, lambda: (sample_k(19), sample_k(19)),
725 726
    ...                     model="probabilistic-configuration",
    ...                     edge_probs=lambda a, b: (p.pmf(a[0], b[1]) * p.pmf(a[1], 20 - b[0])),
727
    ...                     n_iter=100)
728
    >>> figure()
729
    <...>
730
    >>> corr = gt.avg_neighbour_corr(g, "in", "out")
731
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
732
    ...          label=r"$\left<\text{o}\right>$ vs i")
733
    <...>
734
    >>> corr = gt.avg_neighbour_corr(g, "out", "in")
735
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
736
    ...          label=r"$\left<\text{i}\right>$ vs o")
737
    <...>
738
    >>> ret = gt.random_rewire(g, "constrained-configuration")
739
    >>> corr = gt.avg_neighbour_corr(g, "in", "out")
740
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
741
    ...          label=r"$\left<\text{o}\right>$ vs i, corr.")
742
    <...>
743
    >>> corr = gt.avg_neighbour_corr(g, "out", "in")
744
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
745
    ...          label=r"$\left<\text{i}\right>$ vs o, corr.")
746
    <...>
747
    >>> ret = gt.random_rewire(g, "configuration")
748
    >>> corr = gt.avg_neighbour_corr(g, "in", "out")
749
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
750
    ...          label=r"$\left<\text{o}\right>$ vs i, uncorr.")
751
    <...>
752
    >>> corr = gt.avg_neighbour_corr(g, "out", "in")
753
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
754
    ...          label=r"$\left<\text{i}\right>$ vs o, uncorr.")
755
    <...>
756
    >>> legend(loc="lower right", borderaxespad=0., framealpha=0.8)
757 758
    <...>
    >>> xlabel("Source degree")
759
    <...>
760
    >>> ylabel("Average target degree")
761
    <...>
762
    >>> tight_layout()
763
    >>> savefig("shuffled-deg-corr-dir.pdf")
764

765 766 767 768 769
    .. testcode::
       :hide:

       savefig("shuffled-deg-corr-dir.png")

770
    .. figure:: shuffled-deg-corr-dir.*
771 772 773 774 775 776
        :align: center

        Average degree correlations for the different shuffled and non-shuffled
        directed graphs. The shuffled graph with correlations displays exactly
        the same correlation as the original graph.

777 778 779 780 781
    References
    ----------
    .. [metropolis-equations-1953]  Metropolis, N.; Rosenbluth, A.W.;
       Rosenbluth, M.N.; Teller, A.H.; Teller, E. "Equations of State
       Calculations by Fast Computing Machines". Journal of Chemical Physics 21
782
       (6): 1087-1092 (1953). :doi:`10.1063/1.1699114`
783
    .. [hastings-monte-carlo-1970] Hastings, W.K. "Monte Carlo Sampling Methods
784
       Using Markov Chains and Their Applications". Biometrika 57 (1): 97-109 (1970).
785
       :doi:`10.1093/biomet/57.1.97`
786 787 788 789 790 791
    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, and
       Samuel Leinhardt, "Stochastic blockmodels: First steps," Social Networks
       5, no. 2: 109-13 (1983) :doi:`10.1016/0378-8733(83)90021-7`
    .. [karrer-stochastic-2011] Brian Karrer and M. E. J. Newman, "Stochastic
       blockmodels and community structure in networks," Physical Review E 83,
       no. 1: 016107 (2011) :doi:`10.1103/PhysRevE.83.016107` :arxiv:`1008.3926`
792 793

    """
794 795 796 797 798 799 800 801 802 803 804 805 806
    # if not parallel_edges:
    #     p = label_parallel_edges(g)
    #     if p.a.max() != 0:
    #         raise ValueError("Parallel edge detected. Can't rewire " +
    #                          "graph without parallel edges if it " +
    #                          "already contains parallel edges!")

    # if not self_loops:
    #     l = label_self_loops(g)
    #     if l.a.max() != 0:
    #         raise ValueError("Self-loop detected. Can't rewire graph " +
    #                          "without self-loops if it already contains" +
    #                          " self-loops!")
Tiago Peixoto's avatar
Tiago Peixoto committed
807

808 809
    if (edge_probs is not None and not g.is_directed()) and "blockmodel" not in model:
        corr = lambda i, j: edge_probs(i[1], j[1])
810
    else:
811
        corr = edge_probs
812

813 814
    if model not in ["probabilistic-configuration", "blockmodel",
                     "blockmodel-degree"]:
815
        g = GraphView(g, reversed=False)
816

817
    if model == "blockmodel-degree" and alias and edge_sweep:
818 819 820
        edge_sweep = False
        n_iter *= g.num_edges()

821 822
    traditional = True
    if model == "blockmodel-degree":
823
        model = "blockmodel"
824
        traditional = False
825

826 827 828 829 830 831
    if pin is None:
        pin = g.new_edge_property("bool")

    if pin.value_type() != "bool":
        pin = pin.copy(value_type="bool")

832 833
    pcount = libgraph_tool_generation.random_rewire(g._Graph__graph,
                                                    _c_str(model),
834 835
                                                    n_iter, not edge_sweep,
                                                    self_loops, parallel_edges,
836
                                                    alias, traditional, persist,
837 838 839
                                                    corr,
                                                    _prop("e", g, pin),
                                                    _prop("v", g, block_membership),
840
                                                    cache_probs,
841
                                                    _get_rng(), verbose)
842
    return pcount
Tiago Peixoto's avatar
Tiago Peixoto committed
843

844 845 846 847 848 849 850 851 852 853
def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
    r"""Generate a random graph by sampling from the Poisson stochastic block model.

    Parameters
    ----------
    b : iterable or :class:`numpy.ndarray`
        Group membership for each node.
    probs : two-dimensional :class:`numpy.ndarray` or :class:`scipy.sparse.spmatrix`
        Matrix with edge propensities between groups. The value ``probs[r,s]``
        corresponds to the average number of edges between groups ``r`` and
854 855
        ``s`` (or twice the average number if ``r == s`` and the graph is
        undirected).
856
    out_degs : iterable or :class:`numpy.ndarray` (optional, default: ``None``)
857 858 859
        Out-degree propensity for each node. If not provided, a constant value
        will be used. Note that the values will be normalized inside each group,
        if they are not already so.
860
    in_degs : iterable or :class:`numpy.ndarray` (optional, default: ``None``)
861 862 863
        In-degree propensity for each node. If not provided, a constant value
        will be used. Note that the values will be normalized inside each group,
        if they are not already so.
864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884
    directed : ``bool`` (optional, default: ``False``)
        Whether the graph is directed.

    Returns
    -------
    g : :class:`~graph_tool.Graph`
        The generated graph.

    See Also
    --------
    random_graph: random graph generation

    Notes
    -----

    The graphs are generated with probability

    .. math::

        P({\boldsymbol A}|{\boldsymbol \lambda},{\boldsymbol \theta},{\boldsymbol b})
            = \prod_{i<j}\frac{e^{-\lambda_{b_ib_j}\theta_i\theta_j}(\lambda_{b_ib_j}\theta_i\theta_j)^{A_{ij}}}{A_{ij}!}
885
              \times\prod_i\frac{e^{-\lambda_{b_ib_i}\theta_i^2/2}(\lambda_{b_ib_i}\theta_i^2/2)^{A_{ij}/2}}{(A_{ij}/2)!},
886 887 888

    where :math:`\lambda_{rs}` is the edge propensity between groups :math:`r`
    and :math:`s`, and :math:`\theta_i` is the propensity of node i to receive
889 890 891 892 893 894
    edges, which is proportional to its expected degree. Note that in the
    algorithm it is assumed that the node propensities are normalized for each
    group,

    .. math::

Tiago Peixoto's avatar
Tiago Peixoto committed
895
        \sum_i\theta_i\delta_{b_i,r} = 1,
896

Tiago Peixoto's avatar
Tiago Peixoto committed
897 898 899 900 901 902 903 904
    such that the value :math:`\lambda_{rs}` will correspond to the average
    number of edges between groups :math:`r` and :math:`s` (or twice that if
    :math:`r = s`). If the supplied values of :math:`\theta_i` are not
    normalized as above, they will be normalized prior to the generation of the
    graph.

    For directed graphs, the probability is analogous, with :math:`\lambda_{rs}`
    being in general asymmetric:
905 906 907