__init__.py 76 KB
Newer Older
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4
5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2017 Tiago de Paula Peixoto <tiago@skewed.de>
7
8
9
10
11
12
13
14
15
16
17
18
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
Tiago Peixoto's avatar
Tiago Peixoto committed
19
# along with this program.  If not, see <http://www.gnu.org/licenses/>.s
20

21
"""
22
``graph_tool.generation`` - Random graph generation
23
---------------------------------------------------
24
25
26
27
28
29
30
31
32

Summary
+++++++

.. autosummary::
   :nosignatures:

   random_graph
   random_rewire
33
   generate_sbm
34
35
36
   predecessor_tree
   line_graph
   graph_union
37
   triangulation
38
39
   lattice
   geometric_graph
40
   price_network
Tiago Peixoto's avatar
Tiago Peixoto committed
41
   complete_graph
Tiago Peixoto's avatar
Tiago Peixoto committed
42
   circular_graph
43
   condensation_graph
44
45
46

Contents
++++++++
47
48
"""

49
from __future__ import division, absolute_import, print_function
50
51
52
import sys
if sys.version_info < (3,):
    range = xrange
53

Tiago Peixoto's avatar
Tiago Peixoto committed
54
from .. dl_import import dl_import
55
dl_import("from . import libgraph_tool_generation")
56

57
58
from .. import Graph, GraphView, _check_prop_scalar, _prop, _limit_args, \
    _gt_type, _get_rng, _c_str, libcore
Tiago Peixoto's avatar
Tiago Peixoto committed
59
from .. stats import label_parallel_edges, label_self_loops
60
61
import inspect
import types
62
63
import numpy
import numpy.random
64

65
66
67
68
__all__ = ["random_graph", "random_rewire", "generate_sbm", "predecessor_tree",
           "line_graph", "graph_union", "triangulation", "lattice",
           "geometric_graph", "price_network", "complete_graph",
           "circular_graph", "condensation_graph"]
69

Tiago Peixoto's avatar
Tiago Peixoto committed
70

71
72
def random_graph(N, deg_sampler, directed=True,
                 parallel_edges=False, self_loops=False, block_membership=None,
73
                 block_type="int", degree_block=False,
74
                 random=True, verbose=False, **kwargs):
Tiago Peixoto's avatar
Tiago Peixoto committed
75
    r"""
76
77
78
79
80
81
82
    Generate a random graph, with a given degree distribution and (optionally)
    vertex-vertex correlation.

    The graph will be randomized via the :func:`~graph_tool.generation.random_rewire`
    function, and any remaining parameters will be passed to that function.
    Please read its documentation for all the options regarding the different
    statistical models which can be chosen.
Tiago Peixoto's avatar
Tiago Peixoto committed
83
84
85
86
87
88
89
90
91
92
93

    Parameters
    ----------
    N : int
        Number of vertices in the graph.
    deg_sampler : function
        A degree sampler function which is called without arguments, and returns
        a tuple of ints representing the in and out-degree of a given vertex (or
        a single int for undirected graphs, representing the out-degree). This
        function is called once per vertex, but may be called more times, if the
        degree sequence cannot be used to build a graph.
94

95
        Optionally, you can also pass a function which receives one or two
Tiago Peixoto's avatar
Tiago Peixoto committed
96
        arguments. If ``block_membership is None``, the single argument passed
97
        will be the index of the vertex which will receive the degree.  If
Tiago Peixoto's avatar
Tiago Peixoto committed
98
        ``block_membership is not None``, the first value passed will be the vertex
99
        index, and the second will be the block value of the vertex.
100
    directed : bool (optional, default: ``True``)
Tiago Peixoto's avatar
Tiago Peixoto committed
101
        Whether the generated graph should be directed.
102
103
104
105
    parallel_edges : bool (optional, default: ``False``)
        If ``True``, parallel edges are allowed.
    self_loops : bool (optional, default: ``False``)
        If ``True``, self-loops are allowed.
106
    block_membership : list or :class:`~numpy.ndarray` or function (optional, default: ``None``)
107
        If supplied, the graph will be sampled from a stochastic blockmodel
108
109
110
        ensemble, and this parameter specifies the block membership of the
        vertices, which will be passed to the
        :func:`~graph_tool.generation.random_rewire` function.
111
112
113
114

        If the value is a list or a :class:`~numpy.ndarray`, it must have
        ``len(block_membership) == N``, and the values will define to which
        block each vertex belongs.
115
116
117
118

        If this value is a function, it will be used to sample the block
        types. It must be callable either with no arguments or with a single
        argument which will be the vertex index. In either case it must return
119
        a type compatible with the ``block_type`` parameter.
120
121
122
123

        See the documentation for the ``vertex_corr`` parameter of the
        :func:`~graph_tool.generation.random_rewire` function which specifies
        the correlation matrix.
124
    block_type : string (optional, default: ``"int"``)
Tiago Peixoto's avatar
Tiago Peixoto committed
125
        Value type of block labels. Valid only if ``block_membership is not None``.
126
127
128
129
130
    degree_block : bool (optional, default: ``False``)
        If ``True``, the degree of each vertex will be appended to block labels
        when constructing the blockmodel, such that the resulting block type
        will be a pair :math:`(r, k)`, where :math:`r` is the original block
        label.
131
132
133
134
135
    random : bool (optional, default: ``True``)
        If ``True``, the returned graph is randomized. Otherwise a deterministic
        placement of the edges will be used.
    verbose : bool (optional, default: ``False``)
        If ``True``, verbose information is displayed.
Tiago Peixoto's avatar
Tiago Peixoto committed
136
137
138

    Returns
    -------
139
    random_graph : :class:`~graph_tool.Graph`
Tiago Peixoto's avatar
Tiago Peixoto committed
140
        The generated graph.
141
142
    blocks : :class:`~graph_tool.PropertyMap`
        A vertex property map with the block values. This is only returned if
Tiago Peixoto's avatar
Tiago Peixoto committed
143
        ``block_membership is not None``.
Tiago Peixoto's avatar
Tiago Peixoto committed
144
145
146

    See Also
    --------
147
    random_rewire: in-place graph shuffling
Tiago Peixoto's avatar
Tiago Peixoto committed
148
149
150

    Notes
    -----
Tiago Peixoto's avatar
Tiago Peixoto committed
151
152
153
    The algorithm makes sure the degree sequence is graphical (i.e. realizable)
    and keeps re-sampling the degrees if is not. With a valid degree sequence,
    the edges are placed deterministically, and later the graph is shuffled with
154
155
    the :func:`~graph_tool.generation.random_rewire` function, with all
    remaining parameters passed to it.
Tiago Peixoto's avatar
Tiago Peixoto committed
156

157
    The complexity is :math:`O(V + E)` if parallel edges are allowed, and
158
    :math:`O(V + E \times\text{n-iter})` if parallel edges are not allowed.
159
160
161
162
163
164


    .. note ::

        If ``parallel_edges == False`` this algorithm only guarantees that the
        returned graph will be a random sample from the desired ensemble if
165
        ``n_iter`` is sufficiently large. The algorithm implements an
166
167
168
169
        efficient Markov chain based on edge swaps, with a mixing time which
        depends on the degree distribution and correlations desired. If degree
        correlations are provided, the mixing time tends to be larger.

Tiago Peixoto's avatar
Tiago Peixoto committed
170
171
    Examples
    --------
172
173
174
175

    .. testcode::
       :hide:

176
       import numpy.random
177
       from pylab import *
178
       np.random.seed(43)
179
       gt.seed_rng(42)
Tiago Peixoto's avatar
Tiago Peixoto committed
180
181
182
183
184
185
186

    This is a degree sampler which uses rejection sampling to sample from the
    distribution :math:`P(k)\propto 1/k`, up to a maximum.

    >>> def sample_k(max):
    ...     accept = False
    ...     while not accept:
187
188
    ...         k = np.random.randint(1,max+1)
    ...         accept = np.random.random() < 1.0/k
Tiago Peixoto's avatar
Tiago Peixoto committed
189
190
191
192
193
194
195
196
197
198
199
    ...     return k
    ...

    The following generates a random undirected graph with degree distribution
    :math:`P(k)\propto 1/k` (with k_max=40) and an *assortative* degree
    correlation of the form:

    .. math::

        P(i,k) \propto \frac{1}{1+|i-k|}

200
201
    >>> g = gt.random_graph(1000, lambda: sample_k(40), model="probabilistic-configuration",
    ...                     edge_probs=lambda i, k: 1.0 / (1 + abs(i - k)), directed=False,
202
    ...                     n_iter=100)
Tiago Peixoto's avatar
Tiago Peixoto committed
203
204
205
206
207
208
209
210
211
212
213
214

    The following samples an in,out-degree pair from the joint distribution:

    .. math::

        p(j,k) = \frac{1}{2}\frac{e^{-m_1}m_1^j}{j!}\frac{e^{-m_1}m_1^k}{k!} +
                 \frac{1}{2}\frac{e^{-m_2}m_2^j}{j!}\frac{e^{-m_2}m_2^k}{k!}

    with :math:`m_1 = 4` and :math:`m_2 = 20`.

    >>> def deg_sample():
    ...    if random() > 0.5:
215
    ...        return np.random.poisson(4), np.random.poisson(4)
Tiago Peixoto's avatar
Tiago Peixoto committed
216
    ...    else:
217
    ...        return np.random.poisson(20), np.random.poisson(20)
218
    ...
Tiago Peixoto's avatar
Tiago Peixoto committed
219
220
221
222
223
224
225

    The following generates a random directed graph with this distribution, and
    plots the combined degree correlation.

    >>> g = gt.random_graph(20000, deg_sample)
    >>>
    >>> hist = gt.combined_corr_hist(g, "in", "out")
226
    >>>
227
228
    >>> figure()
    <...>
229
    >>> imshow(hist[0].T, interpolation="nearest", origin="lower")
Tiago Peixoto's avatar
Tiago Peixoto committed
230
231
232
    <...>
    >>> colorbar()
    <...>
233
    >>> xlabel("in-degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
234
    <...>
235
    >>> ylabel("out-degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
236
    <...>
237
    >>> tight_layout()
Tiago Peixoto's avatar
Tiago Peixoto committed
238
    >>> savefig("combined-deg-hist.svg")
Tiago Peixoto's avatar
Tiago Peixoto committed
239

240
241
242
    .. testcode::
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
243
       savefig("combined-deg-hist.pdf")
244

245
    .. figure:: combined-deg-hist.*
Tiago Peixoto's avatar
Tiago Peixoto committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
        :align: center

        Combined degree histogram.

    A correlated directed graph can be build as follows. Consider the following
    degree correlation:

    .. math::

         P(j',k'|j,k)=\frac{e^{-k}k^{j'}}{j'!}
         \frac{e^{-(20-j)}(20-j)^{k'}}{k'!}

    i.e., the in->out correlation is "disassortative", the out->in correlation
    is "assortative", and everything else is uncorrelated.
    We will use a flat degree distribution in the range [1,20).

    >>> p = scipy.stats.poisson
    >>> g = gt.random_graph(20000, lambda: (sample_k(19), sample_k(19)),
264
265
266
    ...                     model="probabilistic-configuration",
    ...                     edge_probs=lambda a,b: (p.pmf(a[0], b[1]) *
    ...                                             p.pmf(a[1], 20 - b[0])),
267
    ...                     n_iter=100)
Tiago Peixoto's avatar
Tiago Peixoto committed
268
269
270

    Lets plot the average degree correlations to check.

Tiago Peixoto's avatar
Tiago Peixoto committed
271
    >>> figure(figsize=(6,3))
272
    <...>
273
    >>> corr = gt.avg_neighbor_corr(g, "in", "in")
274
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
275
    ...         label=r"$\left<\text{in}\right>$ vs in")
276
    <...>
277
    >>> corr = gt.avg_neighbor_corr(g, "in", "out")
278
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
279
    ...         label=r"$\left<\text{out}\right>$ vs in")
280
    <...>
281
    >>> corr = gt.avg_neighbor_corr(g, "out", "in")
282
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
283
    ...          label=r"$\left<\text{in}\right>$ vs out")
284
    <...>
285
    >>> corr = gt.avg_neighbor_corr(g, "out", "out")
286
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
287
    ...          label=r"$\left<\text{out}\right>$ vs out")
Tiago Peixoto's avatar
Tiago Peixoto committed
288
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
289
    >>> legend(loc='center left', bbox_to_anchor=(1, 0.5))
290
291
    <...>
    >>> xlabel("Source degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
292
    <...>
293
    >>> ylabel("Average target degree")
Tiago Peixoto's avatar
Tiago Peixoto committed
294
    <...>
295
    >>> tight_layout()
Tiago Peixoto's avatar
Tiago Peixoto committed
296
297
298
    >>> box = gca().get_position()
    >>> gca().set_position([box.x0, box.y0, box.width * 0.7, box.height])
    >>> savefig("deg-corr-dir.svg")
Tiago Peixoto's avatar
Tiago Peixoto committed
299

300
301
302
    .. testcode::
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
303
       savefig("deg-corr-dir.pdf")
304

305
    .. figure:: deg-corr-dir.*
Tiago Peixoto's avatar
Tiago Peixoto committed
306
307
        :align: center

308
        Average nearest neighbor correlations.
309
310


311
    **Stochastic blockmodels**
312
313


314
315
316
    The following example shows how a stochastic blockmodel
    [holland-stochastic-1983]_ [karrer-stochastic-2011]_ can be generated. We
    will consider a system of 10 blocks, which form communities. The connection
317
318
    probability will be given by

319
    >>> def prob(a, b):
320
321
322
323
324
325
326
    ...    if a == b:
    ...        return 0.999
    ...    else:
    ...        return 0.001

    The blockmodel can be generated as follows.

Tiago Peixoto's avatar
Tiago Peixoto committed
327
    >>> g, bm = gt.random_graph(2000, lambda: poisson(10), directed=False,
328
    ...                         model="blockmodel",
329
    ...                         block_membership=lambda: randint(10),
330
    ...                         edge_probs=prob)
Tiago Peixoto's avatar
Tiago Peixoto committed
331
    >>> gt.graph_draw(g, vertex_fill_color=bm, edge_color="black", output="blockmodel.pdf")
332
333
    <...>

334
335
336
    .. testcode::
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
337
       gt.graph_draw(g, vertex_fill_color=bm, edge_color="black", output="blockmodel.png")
338

339
    .. figure:: blockmodel.*
340
341
342
343
344
345
346
347
348
349
        :align: center

        Simple blockmodel with 10 blocks.


    References
    ----------
    .. [metropolis-equations-1953]  Metropolis, N.; Rosenbluth, A.W.;
       Rosenbluth, M.N.; Teller, A.H.; Teller, E. "Equations of State
       Calculations by Fast Computing Machines". Journal of Chemical Physics 21
350
       (6): 1087-1092 (1953). :doi:`10.1063/1.1699114`
351
    .. [hastings-monte-carlo-1970] Hastings, W.K. "Monte Carlo Sampling Methods
352
       Using Markov Chains and Their Applications". Biometrika 57 (1): 97-109 (1970).
353
       :doi:`10.1093/biomet/57.1.97`
354
355
356
357
358
359
    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, and
       Samuel Leinhardt, "Stochastic blockmodels: First steps," Social Networks
       5, no. 2: 109-13 (1983) :doi:`10.1016/0378-8733(83)90021-7`
    .. [karrer-stochastic-2011] Brian Karrer and M. E. J. Newman, "Stochastic
       blockmodels and community structure in networks," Physical Review E 83,
       no. 1: 016107 (2011) :doi:`10.1103/PhysRevE.83.016107` :arxiv:`1008.3926`
Tiago Peixoto's avatar
Tiago Peixoto committed
360
    """
361

362
    g = Graph()
363

364
365
    if (type(block_membership) is types.FunctionType or
        type(block_membership) is types.LambdaType):
366
367
        btype = block_type
        bm = []
368
        if len(inspect.getargspec(block_membership)[0]) == 0:
369
            for i in range(N):
370
                bm.append(block_membership())
371
        else:
372
            for i in range(N):
373
374
375
376
                bm.append(block_membership(i))
        block_membership = bm
    elif block_membership is not None:
        btype = _gt_type(block_membership[0])
377
378

    if len(inspect.getargspec(deg_sampler)[0]) > 0:
379
380
        if block_membership is not None:
            sampler = lambda i: deg_sampler(i, block_membership[i])
381
        else:
Tiago Peixoto's avatar
Tiago Peixoto committed
382
            sampler = deg_sampler
383
384
385
    else:
        sampler = lambda i: deg_sampler()

386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
    if not directed:
        def sampler_wrap(*args):
            k = sampler(*args)
            try:
                return int(k)
            except:
                raise ValueError("degree value not understood: " + str(k))
    else:
        def sampler_wrap(*args):
            k = sampler(*args)
            try:
                return int(k[0]), int(k[1])
            except:
                raise ValueError("(in,out)-degree value pair not understood: " +
                                 str(k))

    libgraph_tool_generation.gen_graph(g._Graph__graph, N, sampler_wrap,
403
                                       not parallel_edges,
404
                                       not self_loops, not directed,
405
                                       _get_rng(), verbose, True)
406
407
    g.set_directed(directed)

408
409
410
411
412
413
414
415
416
417
418
419
    if degree_block:
        if btype in ["object", "string"] or "vector" in btype:
            btype = "object"
        elif btype in ["int", "int32_t", "bool"]:
            btype = "vector<int32_t>"
        elif btype in ["long", "int64_t"]:
            btype = "vector<int64_t>"
        elif btype in ["double"]:
            btype = "vector<double>"
        elif btype in ["long double"]:
            btype = "vector<long double>"

420
    if block_membership is not None:
421
422
423
        bm = g.new_vertex_property(btype)
        if btype in ["object", "string"] or "vector" in btype:
            for v in g.vertices():
424
                if not degree_block:
425
                    bm[v] = block_membership[int(v)]
426
427
                else:
                    if g.is_directed():
428
                        bm[v] = (block_membership[int(v)], v.in_degree(),
429
430
                                 v.out_degree())
                    else:
431
                        bm[v] = (block_membership[int(v)], v.out_degree())
432
433
        else:
            try:
434
                bm.a = block_membership
435
436
437
            except ValueError:
                bm = g.new_vertex_property("object")
                for v in g.vertices():
438
                    bm[v] = block_membership[int(v)]
439
440
    else:
        bm = None
441

Tiago Peixoto's avatar
Tiago Peixoto committed
442
    if random:
443
        g.set_fast_edge_removal(True)
444
445
446
        random_rewire(g, parallel_edges=parallel_edges,
                      self_loops=self_loops, verbose=verbose,
                      block_membership=bm, **kwargs)
447
        g.set_fast_edge_removal(False)
448

449
450
451
452
    if bm is None:
        return g
    else:
        return g, bm
453

Tiago Peixoto's avatar
Tiago Peixoto committed
454

455
456
@_limit_args({"model": ["erdos", "configuration", "constrained-configuration",
                        "probabilistic-configuration", "blockmodel-degree",
457
                        "blockmodel", "blockmodel-micro"]})
458
def random_rewire(g, model="configuration", n_iter=1, edge_sweep=True,
459
                  parallel_edges=False, self_loops=False, configuration=True,
460
461
                  edge_probs=None, block_membership=None, cache_probs=True,
                  persist=False, pin=None, ret_fail=False, verbose=False):
462
    r"""Shuffle the graph in-place, following a variety of possible statistical
463
464
    models, chosen via the parameter ``model``.

465
466
467

    Parameters
    ----------
468
    g : :class:`~graph_tool.Graph`
469
        Graph to be shuffled. The graph will be modified.
470
    model : string (optional, default: ``"configuration"``)
471
472
        The following statistical models can be chosen, which determine how the
        edges are rewired.
473

474
475
        ``erdos``
           The edges will be rewired entirely randomly, and the resulting graph
476
477
           will correspond to the :math:`G(N,E)` Erdős–Rényi model.
        ``configuration``
478
479
           The edges will be rewired randomly, but the degree sequence of the
           graph will remain unmodified.
480
        ``constrained-configuration``
481
           The edges will be rewired randomly, but both the degree sequence of
482
483
484
           the graph and the *vertex-vertex (in,out)-degree correlations* will
           remain exactly preserved. If the ``block_membership`` parameter is
           passed, the block variables at the endpoints of the edges will be
485
486
487
488
489
490
491
492
493
494
           preserved, instead of the degree-degree correlation.
        ``probabilistic-configuration``
           This is similar to ``constrained-configuration``, but the
           vertex-vertex correlations are not preserved, but are instead sampled
           from an arbitrary degree-based probabilistic model specified via the
           ``edge_probs`` parameter. The degree-sequence is preserved.
        ``blockmodel-degree``
           This is just like ``probabilistic-configuration``, but the values
           passed to the ``edge_probs`` function will correspond to the block
           membership values specified by the ``block_membership`` parameter.
495
        ``blockmodel``
496
497
           This is just like ``blockmodel-degree``, but the degree sequence *is
           not* preserved during rewiring.
498
499
500
        ``blockmodel-micro``
           This is like ``blockmodel``, but the exact number of edges between
           groups is preserved as well.
501
502
503
504
505
506
507
508
509
    n_iter : int (optional, default: ``1``)
        Number of iterations. If ``edge_sweep == True``, each iteration
        corresponds to an entire "sweep" over all edges. Otherwise this
        corresponds to the total number of edges which are randomly chosen for a
        swap attempt (which may repeat).
    edge_sweep : bool (optional, default: ``True``)
        If ``True``, each iteration will perform an entire "sweep" over the
        edges, where each edge is visited once in random order, and a edge swap
        is attempted.
Tiago Peixoto's avatar
Tiago Peixoto committed
510
    parallel_edges : bool (optional, default: ``False``)
511
512
513
        If ``True``, parallel edges are allowed.
    self_loops : bool (optional, default: ``False``)
        If ``True``, self-loops are allowed.
514
515
516
517
518
519
    configuration : bool (optional, default: ``True``)
        If ``True``, graphs are sampled from the corresponding maximum-entropy
        ensemble of configurations (i.e. distinguishable half-edge pairings),
        otherwise they are sampled from the maximum-entropy ensemble of graphs
        (i.e. indistinguishable half-edge pairings). The distinction is only
        relevant if parallel edges are allowed.
520
521
522
    edge_probs : function or sequence of triples (optional, default: ``None``)
        A function which determines the edge probabilities in the graph. In
        general it should have the following signature:
523
524
525

        .. code::

526
            def prob(r, s):
527
528
529
                ...
                return p

530
        where the return value should be a non-negative scalar.
531

532
533
534
535
        Alternatively, this parameter can be a list of triples of the form ``(r,
        s, p)``, with the same meaning as the ``r``, ``s`` and ``p`` values
        above. If a given ``(r, s)`` combination is not present in this list,
        the corresponding value of ``p`` is assumed to be zero. If the same
536
        ``(r, s)`` combination appears more than once, their ``p`` values will
537
538
539
540
541
542
543
544
545
546
547
548
549
550
        be summed together. This is useful when the correlation matrix is
        sparse, i.e. most entries are zero.

        If ``model == probabilistic-configuration`` the parameters ``r`` and
        ``s`` correspond respectively to the (in, out)-degree pair of the source
        vertex of an edge, and the (in,out)-degree pair of the target of the
        same edge (for undirected graphs, both parameters are scalars
        instead). The value of ``p`` should be a number proportional to the
        probability of such an edge existing in the generated graph.

        If ``model == blockmodel-degree`` or ``model == blockmodel``, the ``r``
        and ``s`` values passed to the function will be the block values of the
        respective vertices, as specified via the ``block_membership``
        parameter. The value of ``p`` should be a number proportional to the
551
        probability of such an edge existing in the generated graph.
552
553
554
555
    block_membership : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        If supplied, the graph will be rewired to conform to a blockmodel
        ensemble. The value must be a vertex property map which defines the
        block of each vertex.
556
    cache_probs : bool (optional, default: ``True``)
557
        If ``True``, the probabilities returned by the ``edge_probs`` parameter
558
559
560
561
        will be cached internally. This is crucial for good performance, since
        in this case the supplied python function is called only a few times,
        and not at every attempted edge rewire move. However, in the case were
        the different parameter combinations to the probability function is very
562
563
564
565
566
567
568
569
        large, the memory and time requirements to keep the cache may not be
        worthwhile.
    persist : bool (optional, default: ``False``)
        If ``True``, an edge swap which is rejected will be attempted again
        until it succeeds. This may improve the quality of the shuffling for
        some probabilistic models, and should be sufficiently fast for sparse
        graphs, but otherwise it may result in many repeated attempts for
        certain corner-cases in which edges are difficult to swap.
570
571
572
573
    pin : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge property map which, if provided, specifies which edges are allowed
        to be rewired. Edges for which the property value is ``1`` (or ``True``)
        will be left unmodified in the graph.
574
575
576
577
578
579
    verbose : bool (optional, default: ``False``)
        If ``True``, verbose information is displayed.


    Returns
    -------
580
581
582
    rejection_count : int
        Number of rejected edge moves (due to parallel edges or self-loops, or
        the probabilistic model used).
583
584
585
586
587
588
589

    See Also
    --------
    random_graph: random graph generation

    Notes
    -----
Tiago Peixoto's avatar
Tiago Peixoto committed
590
    This algorithm iterates through all the edges in the network and tries to
591
592
    swap its target or source with the target or source of another edge. The
    selected canditate swaps are chosen according to the ``model`` parameter.
Tiago Peixoto's avatar
Tiago Peixoto committed
593
594

    .. note::
595

596
597
598
599
600
601
602
603
        If ``parallel_edges = False``, parallel edges are not placed during
        rewiring. In this case, the returned graph will be a uncorrelated sample
        from the desired ensemble only if ``n_iter`` is sufficiently large. The
        algorithm implements an efficient Markov chain based on edge swaps, with
        a mixing time which depends on the degree distribution and correlations
        desired. If degree probabilistic correlations are provided, the mixing
        time tends to be larger.

Tiago Peixoto's avatar
Tiago Peixoto committed
604
605
606
607
608
609
610
        If ``model`` is either "probabilistic-configuration", "blockmodel" or
        "blockmodel-degree", the Markov chain still needs to be mixed, even if
        parallel edges and self-loops are allowed. In this case the Markov chain
        is implemented using the Metropolis-Hastings
        [metropolis-equations-1953]_ [hastings-monte-carlo-1970]_
        acceptance/rejection algorithm. It will eventually converge to the
        desired probabilities for sufficiently large values of ``n_iter``.
611

Tiago Peixoto's avatar
Tiago Peixoto committed
612

613
    Each edge is tentatively swapped once per iteration, so the overall
614
615
    complexity is :math:`O(V + E \times \text{n-iter})`. If ``edge_sweep ==
    False``, the complexity becomes :math:`O(V + E + \text{n-iter})`.
616

617
618
619
620
621
    Examples
    --------

    Some small graphs for visualization.

622
623
624
625
626
627
628
629
    .. testcode::
       :hide:

       from numpy.random import random, seed
       from pylab import *
       seed(43)
       gt.seed_rng(42)

630
    >>> g, pos = gt.triangulation(np.random.random((1000,2)))
631
    >>> pos = gt.arf_layout(g)
632
    >>> gt.graph_draw(g, pos=pos, output="rewire_orig.pdf", output_size=(300, 300))
633
    <...>
634
635
636
637
638
639

    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_orig.png", output_size=(300, 300))

640
    >>> ret = gt.random_rewire(g, "constrained-configuration")
641
    >>> pos = gt.arf_layout(g)
642
    >>> gt.graph_draw(g, pos=pos, output="rewire_corr.pdf", output_size=(300, 300))
643
    <...>
644
645
646
647
648
649

    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_corr.png", output_size=(300, 300))

650
    >>> ret = gt.random_rewire(g)
651
    >>> pos = gt.arf_layout(g)
652
    >>> gt.graph_draw(g, pos=pos, output="rewire_uncorr.pdf", output_size=(300, 300))
653
    <...>
654
655
656
657
658
659

    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_uncorr.png", output_size=(300, 300))

660
    >>> ret = gt.random_rewire(g, "erdos")
661
    >>> pos = gt.arf_layout(g)
662
    >>> gt.graph_draw(g, pos=pos, output="rewire_erdos.pdf", output_size=(300, 300))
663
    <...>
664

665
666
667
668
669
    .. testcode::
       :hide:

       gt.graph_draw(g, pos=pos, output="rewire_erdos.png", output_size=(300, 300))

670
    Some `ridiculograms <http://www.youtube.com/watch?v=YS-asmU3p_4>`_ :
671

672
673
674
675
    .. image:: rewire_orig.*
    .. image:: rewire_corr.*
    .. image:: rewire_uncorr.*
    .. image:: rewire_erdos.*
676

677
678
    **From left to right**: Original graph; Shuffled graph, with degree correlations;
    Shuffled graph, without degree correlations; Shuffled graph, with random degrees.
679

680
    We can try with larger graphs to get better statistics, as follows.
681

Tiago Peixoto's avatar
Tiago Peixoto committed
682
    >>> figure(figsize=(6,3))
683
    <...>
684
685
    >>> g = gt.random_graph(30000, lambda: sample_k(20), model="probabilistic-configuration",
    ...                     edge_probs=lambda i, j: exp(abs(i-j)), directed=False,
686
    ...                     n_iter=100)
687
    >>> corr = gt.avg_neighbor_corr(g, "out", "out")
688
689
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="Original")
    <...>
690
    >>> ret = gt.random_rewire(g, "constrained-configuration")
691
    >>> corr = gt.avg_neighbor_corr(g, "out", "out")
692
693
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="*", label="Correlated")
    <...>
694
    >>> ret = gt.random_rewire(g)
695
    >>> corr = gt.avg_neighbor_corr(g, "out", "out")
696
697
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="Uncorrelated")
    <...>
698
    >>> ret = gt.random_rewire(g, "erdos")
699
    >>> corr = gt.avg_neighbor_corr(g, "out", "out")
700
701
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label=r"Erd\H{o}s")
    <...>
702
703
704
705
    >>> xlabel("$k$")
    <...>
    >>> ylabel(r"$\left<k_{nn}\right>$")
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
706
    >>> legend(loc='center left', bbox_to_anchor=(1, 0.5))
707
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
708
    >>> tight_layout()
Tiago Peixoto's avatar
Tiago Peixoto committed
709
710
711
    >>> box = gca().get_position()
    >>> gca().set_position([box.x0, box.y0, box.width * 0.7, box.height])
    >>> savefig("shuffled-stats.svg")
712

713
714
715
    .. testcode::
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
716
       savefig("shuffled-stats.pdf")
717
718


719
    .. figure:: shuffled-stats.*
720
721
722
723
724
725
726
727
728
729
730
        :align: center

        Average degree correlations for the different shuffled and non-shuffled
        graphs. The shuffled graph with correlations displays exactly the same
        correlation as the original graph.

    Now let's do it for a directed graph. See
    :func:`~graph_tool.generation.random_graph` for more details.

    >>> p = scipy.stats.poisson
    >>> g = gt.random_graph(20000, lambda: (sample_k(19), sample_k(19)),
731
732
    ...                     model="probabilistic-configuration",
    ...                     edge_probs=lambda a, b: (p.pmf(a[0], b[1]) * p.pmf(a[1], 20 - b[0])),
733
    ...                     n_iter=100)
Tiago Peixoto's avatar
Tiago Peixoto committed
734
    >>> figure(figsize=(6,3))
735
    <...>
736
    >>> corr = gt.avg_neighbor_corr(g, "in", "out")
737
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
738
    ...          label=r"$\left<\text{o}\right>$ vs i")
739
    <...>
740
    >>> corr = gt.avg_neighbor_corr(g, "out", "in")
741
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
742
    ...          label=r"$\left<\text{i}\right>$ vs o")
743
    <...>
744
    >>> ret = gt.random_rewire(g, "constrained-configuration")
745
    >>> corr = gt.avg_neighbor_corr(g, "in", "out")
746
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
747
    ...          label=r"$\left<\text{o}\right>$ vs i, corr.")
748
    <...>
749
    >>> corr = gt.avg_neighbor_corr(g, "out", "in")
750
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
751
    ...          label=r"$\left<\text{i}\right>$ vs o, corr.")
752
    <...>
753
    >>> ret = gt.random_rewire(g, "configuration")
754
    >>> corr = gt.avg_neighbor_corr(g, "in", "out")
755
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
756
    ...          label=r"$\left<\text{o}\right>$ vs i, uncorr.")
757
    <...>
758
    >>> corr = gt.avg_neighbor_corr(g, "out", "in")
759
    >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
760
    ...          label=r"$\left<\text{i}\right>$ vs o, uncorr.")
761
    <...>
Tiago Peixoto's avatar
Tiago Peixoto committed
762
    >>> legend(loc='center left', bbox_to_anchor=(1, 0.5))
763
764
    <...>
    >>> xlabel("Source degree")
765
    <...>
766
    >>> ylabel("Average target degree")
767
    <...>
768
    >>> tight_layout()
Tiago Peixoto's avatar
Tiago Peixoto committed
769
    >>> box = gca().get_position()
Tiago Peixoto's avatar
Tiago Peixoto committed
770
    >>> gca().set_position([box.x0, box.y0, box.width * 0.55, box.height])
Tiago Peixoto's avatar
Tiago Peixoto committed
771
    >>> savefig("shuffled-deg-corr-dir.svg")
772

773
774
775
    .. testcode::
       :hide:

Tiago Peixoto's avatar
Tiago Peixoto committed
776
       savefig("shuffled-deg-corr-dir.pdf")
777

778
    .. figure:: shuffled-deg-corr-dir.*
779
780
781
782
783
784
        :align: center

        Average degree correlations for the different shuffled and non-shuffled
        directed graphs. The shuffled graph with correlations displays exactly
        the same correlation as the original graph.

785
786
787
788
789
    References
    ----------
    .. [metropolis-equations-1953]  Metropolis, N.; Rosenbluth, A.W.;
       Rosenbluth, M.N.; Teller, A.H.; Teller, E. "Equations of State
       Calculations by Fast Computing Machines". Journal of Chemical Physics 21
790
       (6): 1087-1092 (1953). :doi:`10.1063/1.1699114`
791
    .. [hastings-monte-carlo-1970] Hastings, W.K. "Monte Carlo Sampling Methods
792
       Using Markov Chains and Their Applications". Biometrika 57 (1): 97-109 (1970).
793
       :doi:`10.1093/biomet/57.1.97`
794
795
796
797
798
799
    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, and
       Samuel Leinhardt, "Stochastic blockmodels: First steps," Social Networks
       5, no. 2: 109-13 (1983) :doi:`10.1016/0378-8733(83)90021-7`
    .. [karrer-stochastic-2011] Brian Karrer and M. E. J. Newman, "Stochastic
       blockmodels and community structure in networks," Physical Review E 83,
       no. 1: 016107 (2011) :doi:`10.1103/PhysRevE.83.016107` :arxiv:`1008.3926`
800
801

    """
Tiago Peixoto's avatar
Tiago Peixoto committed
802

803
804
    if (edge_probs is not None and not g.is_directed()) and "blockmodel" not in model:
        corr = lambda i, j: edge_probs(i[1], j[1])
805
    else:
806
        corr = edge_probs
807

808
809
    if model not in ["probabilistic-configuration", "blockmodel",
                     "blockmodel-degree"]:
810
        g = GraphView(g, reversed=False)
811
812
    elif edge_probs is None:
        raise ValueError("A function must be supplied as the 'edge_probs' parameter")
813

814
    traditional = True
815
    micro = False
816
    if model == "blockmodel-degree":
817
        model = "blockmodel"
818
        traditional = False
819
820
821
    if model == "blockmodel-micro":
        model = "blockmodel"
        micro = True
822

823
824
825
826
827
828
    if pin is None:
        pin = g.new_edge_property("bool")

    if pin.value_type() != "bool":
        pin = pin.copy(value_type="bool")

829
830
831
    fast = g.get_fast_edge_removal()
    if not fast:
        g.set_fast_edge_removal(True)
832
833
    pcount = libgraph_tool_generation.random_rewire(g._Graph__graph,
                                                    _c_str(model),
834
835
                                                    n_iter, not edge_sweep,
                                                    self_loops, parallel_edges,
836
                                                    configuration,
837
                                                    traditional, micro, persist, corr,
838
839
                                                    _prop("e", g, pin),
                                                    _prop("v", g, block_membership),
840
                                                    cache_probs,
841
                                                    _get_rng(), verbose)
842
843
    if not fast:
        g.set_fast_edge_removal(False)
844
    return pcount
Tiago Peixoto's avatar
Tiago Peixoto committed
845

846
847
848
849
def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False,
                 micro_ers=False, micro_degs=False):
    r"""Generate a random graph by sampling from the Poisson or microcanonical
    stochastic block model.
850
851
852
853
854
855
856
857

    Parameters
    ----------
    b : iterable or :class:`numpy.ndarray`
        Group membership for each node.
    probs : two-dimensional :class:`numpy.ndarray` or :class:`scipy.sparse.spmatrix`
        Matrix with edge propensities between groups. The value ``probs[r,s]``
        corresponds to the average number of edges between groups ``r`` and
858
859
        ``s`` (or twice the average number if ``r == s`` and the graph is
        undirected).
860
    out_degs : iterable or :class:`numpy.ndarray` (optional, default: ``None``)
861
862
863
        Out-degree propensity for each node. If not provided, a constant value
        will be used. Note that the values will be normalized inside each group,
        if they are not already so.
864
    in_degs : iterable or :class:`numpy.ndarray` (optional, default: ``None``)
865
866
867
        In-degree propensity for each node. If not provided, a constant value
        will be used. Note that the values will be normalized inside each group,
        if they are not already so.
868
869
    directed : ``bool`` (optional, default: ``False``)
        Whether the graph is directed.
870
871
872
873
874
875
876
877
878
879
880
    micro_ers : ``bool`` (optional, default: ``False``)
        If true, the `microcanonical` version of the model will be evoked, where
        the numbers of edges between groups will be given `exactly` by the
        parameter ``probs``, and this will not fluctuate between samples.
    micro_degs : ``bool`` (optional, default: ``False``)
        If true, the `microcanonical` version of the degree-corrected model will
        be evoked, where the degrees of nodes will be given `exactly` by the
        parameters ``out_degs`` and ``in_degs``, and they will not fluctuate
        between samples. (If ``micro_degs == True`` it implies ``micro_ers ==
        True``.)

881
882
883
884
885
886
887
888
889
890
891
892
893

    Returns
    -------
    g : :class:`~graph_tool.Graph`
        The generated graph.

    See Also
    --------
    random_graph: random graph generation

    Notes
    -----

894
895
896
897
898
    The algorithm generates multigraphs with self-loops, according to the
    Poisson degree-corrected stochastic block model (SBM), which includes the
    traditional SBM as a special case.

    The multigraphs are generated with probability
899
900
901

    .. math::

902
        P({\boldsymbol A}|{\boldsymbol \theta},{\boldsymbol \lambda},{\boldsymbol b})
903
            = \prod_{i<j}\frac{e^{-\lambda_{b_ib_j}\theta_i\theta_j}(\lambda_{b_ib_j}\theta_i\theta_j)^{A_{ij}}}{A_{ij}!}
904
              \times\prod_i\frac{e^{-\lambda_{b_ib_i}\theta_i^2/2}(\lambda_{b_ib_i}\theta_i^2/2)^{A_{ij}/2}}{(A_{ij}/2)!},
905
906
907

    where :math:`\lambda_{rs}` is the edge propensity between groups :math:`r`
    and :math:`s`, and :math:`\theta_i` is the propensity of node i to receive
908
909
910
911
912
913
    edges, which is proportional to its expected degree. Note that in the
    algorithm it is assumed that the node propensities are normalized for each
    group,

    .. math::

Tiago Peixoto's avatar
Tiago Peixoto committed
914
        \sum_i\theta_i\delta_{b_i,r} = 1,
915

Tiago Peixoto's avatar
Tiago Peixoto committed
916
917
918
919
920
921
922
923
    such that the value :math:`\lambda_{rs}` will correspond to the average
    number of edges between groups :math:`r` and :math:`s` (or twice that if
    :math:`r = s`). If the supplied values of :math:`\theta_i` are not
    normalized as above, they will be normalized prior to the generation of the
    graph.

    For directed graphs, the probability is analogous, with :math:`\lambda_{rs}`
    being in general asymmetric:
924
925
926

    .. math::

927
        P({\boldsymbol A}|{\boldsymbol \theta},{\boldsymbol \lambda},{\boldsymbol b})
928
929
            = \prod_{ij}\frac{e^{-\lambda_{b_ib_j}\theta^+_i\theta^-_j}(\lambda_{b_ib_j}\theta^+_i\theta^-_j)^{A_{ij}}}{A_{ij}!}.

Tiago Peixoto's avatar
Tiago Peixoto committed
930
931
932
933
934
935
936
937
938
    Again, the same normalization is assumed:

    .. math::

        \sum_i\theta_i^+\delta_{b_i,r} = \sum_i\theta_i^-\delta_{b_i,r} = 1,

    such that the value :math:`\lambda_{rs}` will correspond to the average
    number of directed edges between groups :math:`r` and :math:`s`.

939
940
941
942
943
    The traditional (i.e. non-degree-corrected) SBM is recovered from the above
    model by setting :math:`\theta_i=1/n_{b_i}` (or
    :math:`\theta^+_i=\theta^-_i=1/n_{b_i}` in the directed case), which is done
    automatically if ``out_degs`` and ``in_degs`` are not specified.

944
945
946
947
948
949
950
951
952
    In case the parameter ``micro_degs == True`` is passed, a `microcanical
    <https://en.wikipedia.org/wiki/Microcanonical_ensemble>`_ model is used
    instead, where both the number of edges between groups as well as the
    degrees of the nodes are preserved `exactly`, instead of only on
    expectation. In this case, the parameters are interpreted as
    :math:`{\boldsymbol\lambda}\equiv{\boldsymbol e}` and
    :math:`{\boldsymbol\theta}\equiv{\boldsymbol k}`, where :math:`e_{rs}` is
    the number of edges between groups :math:`r` and :math:`s` (or twice that if
    :math:`r=s` in the undirected case), and :math:`k_i` is the degree of node
953
954
    :math:`i`. This model is a generalization of the configuration model, where
    multigraphs are sampled with probability
955
956
957
958
959
960
961
962
963
964
965
966
967

    .. math::

        P({\boldsymbol A}|{\boldsymbol k},{\boldsymbol e},{\boldsymbol b}) =
        \frac{\prod_{r<s}e_{rs}!\prod_re_{rr}!!\prod_ik_i!}{\prod_re_r!\prod_{i<j}A_{ij}!\prod_iA_{ii}!!}.

    and in the directed case with probability

    .. math::

        P({\boldsymbol A}|{\boldsymbol k}^+,{\boldsymbol k}^-,{\boldsymbol e},{\boldsymbol b}) =
        \frac{\prod_{rs}e_{rs}!\prod_ik^+_i!k^-_i!}{\prod_re^+_r!e^-_r!\prod_{ij}A_{ij}!}.

968
969
    where :math:`e^+_r = \sum_se_{rs}`, :math:`e^-_r = \sum_se_{sr}`,
    :math:`k^+_i = \sum_jA_{ij}` and :math:`k^-_i = \sum_jA_{ji}`.
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987

    In the non-degree-corrected case, if ``micro_ers == True``, the
    microcanonical model corresponds to

    .. math::

        P({\boldsymbol A}|{\boldsymbol e},{\boldsymbol b}) =
        \frac{\prod_{r<s}e_{rs}!\prod_re_{rr}!!}{\prod_rn_r^{e_r}\prod_{i<j}A_{ij}!\prod_iA_{ii}!!},

    and in the directed case to

    .. math::

        P({\boldsymbol A}|{\boldsymbol e},{\boldsymbol b}) =
        \frac{\prod_{rs}e_{rs}!}{\prod_rn_r^{e_r^+ + e_r^-}\prod_{ij}A_{ij}!}.

    In every case above, the final graph is generated in time :math:`O(V + E +
    B)`, where :math:`B` is the number of groups.
988
989
990
991
992
993
994
995

    Examples
    --------

    >>> g = gt.collection.data["polblogs"]
    >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
    >>> g = gt.Graph(g, prune=True)
    >>> state = gt.minimize_blockmodel_dl(g)
Tiago Peixoto's avatar
Tiago Peixoto committed
996
    >>> u = gt.generate_sbm(state.b.a, gt.adjacency(state.get_bg(),
997
    ...                                             state.get_ers()).T,
998
999
1000
    ...                     g.degree_property_map("out").a,
    ...                     g.degree_property_map("in").a, directed=True)
    >>> gt.graph_draw(g, g.vp.pos, output="polblogs-sbm.png")