nested_blockmodel.py 74.3 KB
Newer Older
1
2
3
4
5
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2015 Tiago de Paula Peixoto <tiago@skewed.de>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
    range = xrange

26
27
from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap, \
    infect_vertex_property
28
from .. stats import label_self_loops
29
30
from .. generation import graph_union
from .. topology import shortest_path
31
32
33
34
35
36
37
38
39
import random
from numpy import *
import numpy
from scipy.optimize import fsolve, fminbound
import scipy.special
from collections import defaultdict
import copy

from . blockmodel import *
40
from . blockmodel import _bm_test
41
42
43
44
45
46
47

class NestedBlockState(object):
    r"""This class encapsulates the nested block state of a given graph.

    This must be instantiated and used by functions such as :func:`nested_mcmc_sweep`.

    The instances of this class contain a data member called ``levels``, which
48
49
50
    is a list of :class:`~graph_tool.community.BlockState` (or
    :class:`~graph_tool.community.OverlapBlockState`) instances, containing the
    entire nested hierarchy.
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge weights (i.e. multiplicity).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex weights (i.e. multiplicity).
    bs : list of :class:`~graph_tool.PropertyMap` or :class:`~numpy.ndarray` instances (optional, default: ``None``)
        Initial block labels on the vertices, for each hierarchy level.
    Bs : list of ``int`` (optional, default: ``None``)
        Number of blocks for each hierarchy level.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be used in the bottom level, otherwise the traditional variant will be used.
67
68
69
70
71
72
    overlap : ``bool`` (optional, default: ``False``)
        If ``True``, the mixed-membership version of the blockmodel will be used
        at the lowest level.
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
73
74
75
76
77
78
    max_BE : ``int`` (optional, default: ``1000``)
        If the number of blocks exceeds this number, a sparse representation of
        the block graph is used, which is slightly less efficient, but uses less
        memory,
    """

79
80
    def __init__(self, g, eweight=None, vweight=None, ec=None, bs=None, Bs=None,
                 deg_corr=True, overlap=False, layers=False, clabel=None,
81
                 max_BE=1000):
82
        L = len(Bs) if Bs is not None else len(bs)
83
84
85
        self.g = cg = g
        self.vweight = vcount = vweight
        self.eweight = ecount = eweight
86
87
        self.ec = ec
        self.layers = layers
88
89

        self.levels = []
90
91
92
        self.overlap = overlap
        self.deg_corr = deg_corr
        self.clabel = clabel if clabel is not None else g.new_vertex_property("int")
93

94
95
        for l in range(L):
            Bl = Bs[l] if Bs is not None else None
96
97
            bl = None
            if bs is not None:
98
99
                if isinstance(bs[l], PropertyMap):
                    bl = cg.own_property(bs[l])
100
                else:
101
                    bl = bs[l]
102

103
            if l == 0:
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
                if ec is None:
                    if overlap:
                        state = OverlapBlockState(g, B=Bl, b=bl,
                                                  eweight=ecount,
                                                  vweight=vcount,
                                                  deg_corr=deg_corr != False,
                                                  clabel=self.clabel,
                                                  max_BE=max_BE)
                        self.clabel = state.clabel.copy()
                        state.clabel.a = 0
                    else:
                        state = BlockState(g, B=Bl, b=bl,
                                           eweight=ecount,
                                           vweight=vcount,
                                           deg_corr=deg_corr != False,
                                           #clabel=self.clabel,
                                           max_BE=max_BE)
121
                else:
122
123
124
125
126
127
128
129
130
131
132
133
134
135
                    state = CovariateBlockState(g, B=Bl, b=bl,
                                                ec=ec,
                                                layers=layers,
                                                eweight=ecount,
                                                vweight=vcount,
                                                deg_corr=deg_corr != False,
                                                clabel=self.clabel,
                                                overlap=overlap,
                                                max_BE=max_BE)
                    if overlap:
                        self.clabel = state.clabel.copy()
                        state.clabel.a = 0


136
            else:
137
138
139
                state = self.levels[-1].get_block_state(b=bl,
                                                        overlap=self.overlap == "full",
                                                        deg_corr=self.deg_corr == "full")[0]
140
                if _bm_test():
141
142
143
144
                    assert not state.deg_corr, "upper levels must be non-deg-corr"

            self.levels.append(state)

145
146
147
        # for l in range(len(self.levels) - 1):
        #     clabel = self.__project_partition(l, l+1)
        #     self.levels[l].clabel = clabel
148

149
150
151
152
153
154
155
156
157
        if ec is not None:
            self.ec = self.levels[0].ec

    def __repr__(self):
        return "<NestedBlockState object with %d %sblocks,%s%s for graph %s, with %d levels of sizes %s at 0x%x>" % \
            (self.levels[0].B, "overlapping " if self.overlap else "",
             " with %d %s," % (self.levels[0].C, "layers" if self.layers else "covariates") if self.ec is not None else "",
             " degree corrected," if self.deg_corr else "",
             str(self.g), len(self.levels), str([(s.N, s.B) for s in self.levels]), id(self))
158

159
    def copy(self, ec=None, layers=None, deg_corr=None, overlap=None, clabel=None):
160
161
162
163
164
165
166
167
168
        r"""Copies the block state. The parameters override the state properties, and
         have the same meaning as in the constructor.."""

        bs = [s.b.a for s in self.levels]
        if overlap is None:
            overlap = self.overlap
        elif self.overlap and not overlap:
            raise ValueError("Cannot automatically convert overlapping nested state to nonoverlapping")
        elif not self.overlap and overlap:
169
            s = self.levels[0].copy(overlap=True)
170
171
172
            bs[0] = s.b.a
        if deg_corr is None:
            deg_corr = self.deg_corr
173
174
        if layers is None:
            layers = self.layers
175
        return NestedBlockState(self.g, self.eweight, self.vweight,
176
177
178
179
                                self.ec if ec is None else ec, bs,
                                layers=layers, deg_corr=deg_corr,
                                overlap=overlap, clabel=clabel,
                                max_BE=self.levels[0].max_BE)
180
181
182

    def __getstate__(self):
        state = dict(g=self.g,
183
                     ec=self.ec,
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
                     eweight=self.eweight,
                     vweight=self.vweight,
                     overlap=self.overlap,
                     bs=[array(s.b.a) for s in self.levels],
                     clabel=self.clabel,
                     deg_corr=self.deg_corr,
                     max_BE=self.levels[0].max_BE)
        return state

    def __setstate__(self, state):
        self.__init__(**state)
        return state

    def __project_partition(self, l, j):
        """Project partition of level 'j' onto level 'l'"""
        if self.overlap != "full":
            b = self.levels[l].b.copy()
            for i in range(l + 1, j + 1):
                clabel = self.levels[i].b.copy()
                pmap(b, clabel)
        else:
            b = self.levels[j].b.copy()
        return b

    def __propagate_clabel(self, l):
        clabel = self.clabel.copy()
        for j in range(l):
211
212
            bg = self.levels[j].bg
            bclabel = bg.new_vertex_property("int")
213
214
215
216
217
218
219
            reverse_map(self.levels[j].b, bclabel)
            pmap(bclabel, clabel)
            clabel = bclabel
        return clabel

    def __consistency_check(self, op, l):

220
221
        print("consistency check after", op, "at level", l)

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
        for j in range(len(self.levels)):

            c_state = self.levels[j].copy()
            S1 = self.levels[j].entropy()
            S2 = c_state.entropy()

            assert abs(S1 - S2) < 1e-8 and not isnan(S2) and not isnan(S1), "inconsistency at level %d after %s of level %d, different entropies of copies! (%g, %g)" % (j, op, l, S1, S2)


            if self.levels[j].wr.a.min() == 0:
                print("WARNING: empty blocks at level", j)
            if self.levels[j].b.a.max() + 1 != self.levels[j].B:
                print("WARNING: b.max() + 1 != B at level", j, self.levels[j].b.max() + 1, self.levels[j].B)

        for j in range(len(self.levels) - 1):

            B = self.levels[j].b.a.max() + 1
            bg_state = self.levels[j].get_block_state(b=self.levels[j+1].b.copy(),
                                                      overlap=self.overlap == "full",
                                                      deg_corr=self.deg_corr == "full")[0]

            S1 = bg_state.entropy(dense=True and self.deg_corr != "full", multigraph=True)
            S2 = self.levels[j+1].entropy(dense=True and self.deg_corr != "full", multigraph=True)

            if self.levels[j].B != self.levels[j+1].N or S1 != S2:
                self.print_summary()

                from graph_tool.topology import similarity
                print(bg_state)
                print(self.levels[j+1])
                print("N, B:", bg_state.N, bg_state.B)
                print("N, B:", self.levels[j + 1].N, self.levels[j + 1].B)
                print("similarity:", similarity(bg_state.g, self.levels[j+1].g))
                print("b:", bg_state.b.a)
                print("b:", self.levels[j+1].b.a)

                print("wr:", bg_state.wr.a)
                print("wr:", self.levels[j+1].wr.a)

                print("mrs:", bg_state.mrs.a)
                print("mrs:", self.levels[j+1].mrs.a)

                print("eweight:", bg_state.eweight.a)
                print("eweight:", self.levels[j+1].eweight.a)

                print("vweight:", bg_state.vweight.a)
                print("vweight:", self.levels[j+1].vweight.a)


            assert abs(S1 - S2) < 1e-6 and not isnan(S2) and not isnan(S1), "inconsistency at level %d after %s of level %d, different entropies (%g, %g)" % (j, op, l, S1, S2)
            assert self.levels[j].B == self.levels[j+1].N, "inconsistency  at level %d after %s of level %d, different sizes" % (j + 1, op, l)


275
276
277
278
279
280
281
282
283
284
285
286
287
            # verify hierarchy / clabel consistency
            clabel = self.__project_partition(0, j)
            self.levels[0].clabel.a = self.clabel.a
            assert self.levels[0]._BlockState__check_clabel(),  "inconsistency at level %d after %s of level %d, clabel invalidated" % (j + 1, op, l)
            self.levels[0].clabel.a = 0

            # verify hierarchy consistency
            clabel = self.__project_partition(j, j + 1)
            self.levels[0].clabel.a = self.clabel.a
            assert self.levels[0]._BlockState__check_clabel(),  "inconsistency at level %d after %s of level %d, partition not compatible with upper level" % (j + 1, op, l)
            self.levels[0].clabel.a = 0


288
289
290
291
    def __rebuild_level(self, l, b, clabel=None):
        r"""Replace level ``l`` given the new partition ``b``, and the
        projected upper level partition clabel."""

292
        if _bm_test():
293
294
295
296
297
298
299
300
301
            assert clabel is not None or l == len(self.levels) - 1, "clabel not given for intermediary level"

        if clabel is None:
            clabel = self.levels[l].g.new_vertex_property("int")

        old_b = b.copy()

        state = self.levels[l].copy(b=b, clabel=clabel.a)
        self.levels[l] = state
302
303
        if l == 0:
            self.clabel = state.g.own_property(self.clabel)  # for CovariateBlockState
304
305
306
307
308
309
310
311
312
313

        # upper level
        bclabel = state.get_bclabel()
        bstate = self.levels[l].get_block_state(b=bclabel,
                                                overlap=self.overlap == "full",
                                                deg_corr=self.deg_corr == "full")[0]
        if l == len(self.levels) - 1:
            self.levels.append(None)
        self.levels[l + 1] = bstate

314
315
316
317
318
        self.levels[l].clabel.a = 0
        self.levels[l + 1].clabel.a = 0

        # if l + 1 < len(self.levels) - 1:
        #     self.levels[l + 1].clabel = self.__project_partition(l + 1, l + 2)
319
320
321
322
323
324
325


        if l + 1 < len(self.levels) - 1:
            bstate = self.levels[l + 1].get_block_state(b=self.levels[l + 2].b,
                                                        overlap=self.overlap == "full",
                                                        deg_corr=self.deg_corr == "full")[0]

326
            if _bm_test():
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
                from graph_tool.topology import similarity
                print("- similarity:", similarity(bstate.g, self.levels[l + 2].g))


                if abs(bstate.entropy() - self.levels[l + 2].entropy()) > 1e-6:
                    print("********************** inconsistent rebuild! **************************")

                    print(bstate.b.a)
                    print(self.levels[l + 2].b.a)

                    print(bstate.wr.a)
                    print(self.levels[l + 2].wr.a)

                    print(bstate.eweight.a)
                    print(self.levels[l + 2].eweight.a)

                    nclabel = self.__project_partition(l, l + 1)
                    print(nclabel.a)
                    print(clabel.a)
                    print(self.levels[l].b.a)
                    print(self.levels[l+1].b.a)
                    print(self.levels[l+2].b.a)
                    print(bstate.b.a)
                print ("DS", l, l + 1, bstate.entropy(), self.levels[l + 2].entropy())

        B = self.levels[l].B

354
        if _bm_test():
355
            self.__consistency_check("rebuild", l)
356
357
358
359

    def __delete_level(self, l):
        if l == 0:
            raise ValueError("cannot delete level l=0")
360
361
362
363
364
365
        b = self.__project_partition(l - 1, l)
        if l < len(self.levels) - 1:
            clabel = self.__project_partition(l - 1, l + 1)
        else:
            clabel = None

366
367
        del self.levels[l]

368
369
        self.__rebuild_level(l - 1, b=b, clabel=clabel)

370
        if _bm_test():
371
            self.__consistency_check("delete", l)
372

373
    def __duplicate_level(self, l):
374
        assert l > 0, "attempted to duplicate level 0"
375
376
377
        if not self.levels[l].overlap:
            bstate = self.levels[l].copy(b=self.levels[l].g.vertex_index.copy("int"))
        else:
378
            bstate = self.levels[l].copy(b=arange(self.levels[l].g.num_vertices()))
379
        self.levels.insert(l, bstate)
380

381
        if _bm_test():
382
            self.__consistency_check("duplicate", l)
383
384


385
386
    def level_entropy(self, l, complete=True, dense=False, multigraph=True,
                      norm=True, dl_ent=False):
387
388
389
390
391
392
393
394
395
396
397
        r"""Compute the description length of hierarchy level l.

        Parameters
        ----------
        l : ``int``
            Hierarchy level.
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
398
399
400
401
402
403
404
405
        multigraph : ``bool`` (optional, default: ``True``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.
406
407
408
409
        """

        bstate = self.levels[l]

410
        S = bstate.entropy(dl=True, edges_dl=False,
411
                           dense=dense or (l > 0 and self.deg_corr != "full"),
412
                           multigraph=multigraph or l > 0,
413
                           complete=complete or (l > 0 and self.deg_corr == "full"),
414
                           norm=norm, dl_ent=dl_ent)
415
416
        return S

417
    def entropy(self, complete=True, dense=False, multigraph=True, norm=False,
418
                dl_ent=False):
419
420
421
422
423
424
425
426
427
        r"""Compute the description length of the entire hierarchy.

        Parameters
        ----------
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
428
429
430
431
432
433
434
435
        multigraph : ``bool`` (optional, default: ``True``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.
436
437
438
439
        """

        S = 0
        for l in range(len(self.levels)):
440
441
442
            S += self.level_entropy(l, complete=complete, dense=dense,
                                    multigraph=multigraph, norm=norm,
                                    dl_ent=dl_ent)
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
        return S

    def get_bstack(self):
        r"""Return the nested levels as individual graphs.

        This returns a list of :class:`~graph_tool.Graph` instances
        representing the inferred hierarchy at each level. Each graph has two
        internal vertex and edge property maps named "count" which correspond to
        the vertex and edge counts at the lower level, respectively. Additionally,
        an internal vertex property map named "b" specifies the block partition.
        """

        bstack = []
        for l, bstate in enumerate(self.levels):
            cg = bstate.g
            if l == 0:
                cg = GraphView(cg, skip_properties=True)
            cg.vp["b"] = bstate.b.copy()
            cg.ep["count"] = bstate.eweight
462
            if bstate.overlap:
463
464
465
466
                if self.ec is None:
                    cg.vp["node_index"] = bstate.node_index.copy()
                else:
                    cg.vp["node_index"] = bstate.total_state.node_index.copy()
467

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
            bstack.append(cg)
            if bstate.N == 1:
                break
        if bstack[-1].num_vertices() > 1:
            cg = Graph(directed=bstack[-1].is_directed())
            cg.add_vertex()
            cg.vp["b"] = cg.new_vertex_property("int")
            e = cg.add_edge(0, 0)
            ew = cg.new_edge_property("int")
            ew[e] = self.levels[-1].E
            cg.ep["count"] = ew
            bstack.append(cg)
        return bstack


    def project_level(self, l):
484
485
486
        r"""Project the partition at level ``l`` onto the lowest level, and return the
        corresponding :class:`~graph_tool.community.BlockState` (or
        :class:`~graph_tool.community.OverlapBlockState`).  """
487
        if self.overlap != "full":
488
489
490
491
492
493
494
495
            clabel = b = self.levels[l].b.copy()
            while l - 1 >= 0:
                clabel = b
                b = self.levels[l - 1].b.copy()
                pmap(b, clabel)
                l -= 1
        else:
            b = self.levels[l].b.copy()
496
        state = self.levels[0].copy(b=b.a)
497
        return state
498

499
500
501
502
503
504
505
506
507
508
509
510
511
    def merge_layers(self, l_src, l_tgt, revert=False):
        ctxs = []
        for state in self.levels:
            ctxs.append(state.merge_layers(l_src, l_tgt, revert))
        if revert:
            if hasattr(contextlib, "nested"):
                return contextlib.nested(*ctxs)
            else:
                with contextlib.ExitStack() as stack:
                    for ctx in ctxs:
                        stack.enter_context(ctx)
                    return stack.pop_all()

512
513
514
    def print_summary(self):
        for l, state in enumerate(self.levels):
            print("l: %d, N: %d, B: %d" % (l, state.N, state.B))
515

516

517
518
def nested_mcmc_sweep(state, beta=1., c=1., dl=False, sequential=True,
                      parallel=False, verbose=False):
519
520
521
522
523
524
525
526
527
528
529
530
531
    r"""Performs a Markov chain Monte Carlo sweep on all levels of the hierarchy.

    Parameters
    ----------
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
    beta : `float` (optional, default: `1.0`)
        The inverse temperature parameter :math:`\beta`.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
532
533
534
    dl : ``bool`` (optional, default: ``False``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS_moves : list of (``float``, ``int``) tuples
       The entropy difference (per edge) and number of accepted block membership
       moves after a full sweep for each level.


    Notes
    -----

    This algorithm performs a Markov chain Monte Carlo sweep on each level of the
    network, via the function :func:`~graph_tool.community.mcmc_sweep`.

    This algorithm has a worse-case complexity of :math:`O(E \times L)`, where
    :math:`E` is the number of edges in the network, and :math:`L` is the depth
    of the hierarchy.

    Examples
    --------
    .. testsetup:: nested_mcmc

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: nested_mcmc

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.NestedBlockState(g, Bs=[10, 5, 3, 2, 1], deg_corr=True)
       >>> ret = gt.nested_mcmc_sweep(state)
       >>> print(ret)
574
       [(0.0, 0), (0.0, 0), (0.0, 0), (0.0, 0), (0.0, 0)]
575
576
577
578

    References
    ----------

579
580
581
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
582
583
584
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
585
    .. [peixoto-model-2015] Tiago P. Peixoto, "Model selection and hypothesis
586
       testing for large-scale network models with overlapping groups",
587
       Phys. Rev. X 5, 011033 (2015), :doi:`10.1103/PhysRevX.5.011033`,
588
       :arxiv:`1409.3059`.
589
590
591
592
593
594
    """

    rets = []
    for l, bstate in enumerate(state.levels):
        if verbose:
            print("Level:", l, "N:", bstate.N, "B:", bstate.B)
595
596
597
598
599
600
601
602
603
604
605
606
607
608

        # constraint partitions not to invalidate upper layers
        if l < len(state.levels) - 1:
            clabel = state._NestedBlockState__project_partition(l, l + 1)
        else:
            clabel = bstate.g.new_vertex_property("int")

        # propagate externally imposed clabel at the bottom
        cclabel = state._NestedBlockState__propagate_clabel(l)
        cclabel.a += clabel.a * (cclabel.a.max() + 1)
        continuous_map(cclabel)

        bstate.clabel = cclabel

609
610
        ret = mcmc_sweep(bstate, beta=beta, c=c, dl=dl,
                         dense = l > 0 and state.deg_corr != "full",
611
                         multigraph = l > 0,
612
613
                         sequential=sequential, parallel=parallel,
                         verbose=verbose)
614

615
        bstate.clabel.a = 0
616

617
618
619
        rets.append(ret)
    return rets

620
def replace_level(l, state, min_B=None, max_B=None, max_b=None, nsweeps=10,
621
622
623
624
625
                  nmerge_sweeps=10, adaptive_sweeps=True, r=2, c=0, epsilon=0.,
                  sequential=True, parallel=False, dl=False, dense=False,
                  multigraph=True, sparse_thresh=100, verbose=False,
                  checkpoint=None, minimize_state=None, dl_ent=False,
                  confine_layers=False):
626
    r"""Replaces level l with another state with a possibly different number of
627
628
629
    groups. This may change not only the state at level l, but also the one at
    level l + 1, which needs to be 'rebuilt' because of the label changes at
    level l."""
630

631
    if _bm_test():
632
633
        state._NestedBlockState__consistency_check("(before) replace level", l)

634
635
    bstate = state.levels[l]
    g = bstate.g
636
637
    base_g = g if not bstate.overlap else bstate.base_g
    eweight = bstate.eweight if not bstate.overlap else None
638
639
640
641
642
643
    ec = None
    if state.ec is not None:
        if bstate.overlap:
            ec = bstate.base_ec
        else:
            ec = bstate.ec
644

645
646
647
648
649
650
651
    if l > 0 or min_B is None:
        if l + 1 < len(state.levels):
            min_B = state.levels[l + 1].B
        else:
            min_B = 1
    if l > 0 or max_B is None:
        max_B = bstate.N
652

653
654
655
    min_B = max(min_B, state.clabel.a.max() + 1)

    # constraint partitions not to invalidate upper layers
656
657
    if l < len(state.levels) - 1:
        clabel = state._NestedBlockState__project_partition(l, l + 1)
658
    else:
659
660
        clabel = bstate.g.new_vertex_property("int")

661
662
    assert min_B <= max_B, (min_B, max_B, bstate.B, bstate.N, g.num_vertices(),
                            state.clabel.a.max() + 1, clabel.a.max() + 1, l)
663

664
    # propagate externally imposed clabel at the bottom
665
    cclabel = state._NestedBlockState__propagate_clabel(l)
666
    assert cclabel.a.max() <= state.clabel.a.max(),  (cclabel.a.max(), state.clabel.a.max())
667
668
    cclabel.a += clabel.a * (cclabel.a.max() + 1)
    continuous_map(cclabel)
669

670
671
    min_B = max(min_B, cclabel.a.max() + 1)

672
673
    assert min_B <= max_B, (min_B, max_B, bstate.B, bstate.N, g.num_vertices(),
                            cclabel.a.max() + 1, state.clabel.a.max() + 1, clabel.a.max() + 1, l)
674

675
    if _bm_test():
676
        assert bstate._BlockState__check_clabel(), "invalid clabel before minimize!"
677
678
679

    nested_dl = l < len(state.levels) - 1

680
681
    state.levels[l].clabel.a = cclabel.a

682
683
684
685
686
687
688
    Sb = get_b_dl(state.levels[l],
                  dense=(l > 0 and state.deg_corr != "full") or dense,
                  multigraph=l > 0 or multigraph,
                  nested_dl=nested_dl,
                  nested_overlap=state.overlap == "full",
                  dl_ent=dl_ent)

689
690
    state.levels[l].clabel.a = 0

691
    if _bm_test():
692
693
694
        assert clabel.a.max() + 1 <= min_B

    if l == 0 and max_b is not None:
695
696
        istate = state.levels[0].copy(b=max_b.copy(),
                                      clabel=cclabel.a if state.overlap else cclabel)
697
698
        if _bm_test():
            assert istate._BlockState__check_clabel(), "invalid clabel!"
699
700
        if istate.B > max_B:
            istate = multilevel_minimize(istate, B=max_B, nsweeps=nsweeps,
701
702
                                         nmerge_sweeps=nmerge_sweeps,
                                         adaptive_sweeps=adaptive_sweeps, c=c,
703
                                         r=r, dl=dl, sequential=sequential,
704
705
706
                                         parallel=parallel,
                                         greedy_cooling=True, epsilon=epsilon,
                                         confine_layers=confine_layers,
707
                                         verbose=verbose=="full")
708
709
            if _bm_test():
                assert istate._BlockState__check_clabel(), "invalid clabel!"
710
711
712
        init_states = [istate]
    else:
        init_states = None
713

714
715
716
717
718
    res = minimize_blockmodel_dl(base_g,
                                 ec=ec,
                                 layers=state.layers,
                                 confine_layers=confine_layers,
                                 eweight=eweight,
719
720
721
                                 deg_corr=bstate.deg_corr,
                                 nsweeps=nsweeps,
                                 nmerge_sweeps=nmerge_sweeps,
722
                                 adaptive_sweeps=adaptive_sweeps,
723
                                 c=c, r=r, sequential=sequential,
724
725
                                 parallel=parallel,
                                 greedy_cooling=True,
726
                                 epsilon=epsilon,
727
728
                                 max_B=max_B,
                                 min_B=min_B,
729
                                 clabel=cclabel if not bstate.overlap else cclabel.a,
730
                                 max_BE=bstate.max_BE,
731
732
733
734
                                 dl=dl,
                                 dense=(l > 0 and state.deg_corr != "full") or dense,
                                 multigraph=l > 0 or multigraph,
                                 sparse_heuristic=base_g.num_vertices() > sparse_thresh,
735
                                 nested_dl=nested_dl,
736
737
                                 overlap=bstate.overlap,
                                 nested_overlap=state.overlap == "full",
Tiago Peixoto's avatar
Tiago Peixoto committed
738
                                 nonoverlap_compare=False,
739
740
741
                                 nonoverlap_init=False,
                                 init_states=init_states,
                                 verbose=verbose=="full",
742
                                 ##exaustive=g.num_vertices() <= 100,
743
                                 #minimize_state=minimize_state.minimize_state, >>>>>> HERE <<<<<
744
745
746
                                 checkpoint=checkpoint,
                                 dl_ent=dl_ent)

747
    if _bm_test():
748
        assert (res.clabel.a == cclabel.a).all(), (res.clabel.a, cclabel.a)
749
        assert res._BlockState__check_clabel(), "invalid clabel after minimize!"
750

751
752
753
754
755
756
    Sf = get_b_dl(res,
                  dense=(l > 0 and state.deg_corr != "full") or dense,
                  multigraph=l > 0 or multigraph,
                  nested_dl=nested_dl,
                  nested_overlap=state.overlap == "full",
                  dl_ent=dl_ent)
757

758
759
760
761
762
763
    res.clabel.a = 0
    if state.ec is not None:
        for s in res.states:
            s.clabel.a = 0
    b = res.b

764
765
766
767
768
    kept = False
    if Sf - Sb >= -1e-10:
        kept = True
    if Sf - Sb == 0 and bstate.B != state.levels[l].B:
        kept = False
769
770
    if res.B == res.N:
        kept = True
771
772

    if kept:
773
        Sf_rej = Sf
774
775
        Sf = Sb
    else:
776
777
        # rebuild current level
        state._NestedBlockState__rebuild_level(l, b=b, clabel=clabel)
778
779

    if verbose:
780
781
782
783
784
        print("level", l, ": resizing", bstate.B, "->", state.levels[l].B, ", dS:", Sf - Sb, end="")
        if kept:
            print(" [kept, rejected (%d, %g) vs (%d, %g)]" % (res.B, Sf_rej, bstate.B, Sb))
        else:
            print()
785

786
    if _bm_test():
787
788
        state._NestedBlockState__consistency_check("replace level", l)

789
790
791
792
793
794
795
796
797
798
799
    dS = Sf - Sb
    return dS, kept


class NestedMinimizeState(object):
    r"""This object stores information regarding the current entropy minimization
    state, so that the algorithms can resume previously started runs.
    This object can be saved to disk via the :mod:`pickle` interface."""

    def __init__(self):
        self.minimize_state = MinimizeState()
800
        self.l = 0
801
802
        self.bs = []
        self.done = []
803
        self.init = True
804
805
806

    def clear(self):
        self.minimize_state.clear()
807
        self.l = 0
808
809
        del self.bs[:]
        del self.done[:]
810
811
812
813

    def sync(self, state):
        if len(self.bs) == 0:
            for s in state.levels:
814
                self.bs.append(array(s.b.fa))
815
816
817
818
819
820
821
822
823
        while len(self.done) < len(state.levels):
            self.done.append(False)

    def delete(self, l):
        del self.done[l]
        del self.bs[l]

    def insert(self, l, state):
        self.done.insert(l + 1, False)
824
825
        ba = array(state.levels[l].b.fa)
        self.bs.insert(l + 1, ba)
826
827
828

    def mark_level(self, l, done, state):
        while len(state.levels) > len(self.bs):
829
830
            ba = array(state.levels[len(self.bs)].b.fa)
            self.bs.append(ba)
831
832
833
834
835
836
837
            self.done.append(False)

        self.done[l] = done
        if done:
            self.bs[l] = array(state.levels[l].b.fa)

    def clear_mstate(self):
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
        self.minimize_state.clear()

def get_checkpoint_wrap(checkpoint, state, minimize_state, dl_ent):
    S_total = state.entropy(complete=True, dl_ent=dl_ent)

    if checkpoint is not None:
        def check_wrap(bstate, Sb, delta, nmoves, ms):
            l = minimize_state.l
            bstate = state.levels[l]
            S_l = bstate.entropy()
            S = S_total - S_l + Sb
            if bstate is None:
                checkpoint(None, S, delta, nmoves, minimize_state)
            else:
                checkpoint(state, S, delta, nmoves, minimize_state)
        chkp = check_wrap
    else:
        chkp = None

    return chkp
858

859
860

def nested_tree_sweep(state, min_B=None, max_B=None, max_b=None, nsweeps=10,
861
862
863
864
865
                      epsilon=0., r=2., nmerge_sweeps=10, adaptive_sweeps=True,
                      c=0, dl=False, dense=False, multigraph=True,
                      sequential=True, parallel=False, sparse_thresh=100,
                      checkpoint=None, minimize_state=None, frozen_levels=None,
                      confine_layers=False, verbose=False, **kwargs):
866
867
868
869
870
871
872
    r"""Performs one greedy sweep in the entire hierarchy tree, attempting to
    decrease its description length.

    Parameters
    ----------
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
873
874
875
876
877
878
879
    min_B : ``int`` (optional, default: ``None``)
        Minimum number of blocks at the lowest level.
    max_B : ``int`` (optional, default: ``None``)
        Maximum number of blocks at the lowest level.
    max_b : ``int`` (optional, default: ``None``)
        Block partition used for the maximum number of blocks at the lowest
        level.
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    epsilon : ``float`` (optional, default: ``0``)
        Converge criterion for ``adaptive_sweeps``.
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    sparse_thresh : ``int`` (optional, default: ``100``)
        If the number of nodes in the higher level multigraphs exceeds this
        number, the sparse entropy will be used to find the best partition,
        but the dense entropy will be used to compare different partitions.
    checkpoint : function (optional, default: ``None``)
        If provided, this function will be called after each call to
        :func:`mcmc_sweep`. This can be used to store the current state, so it
        can be continued later. The function must have the following signature:

        .. code-block:: python

            def checkpoint(state, S, delta, nmoves, minimize_state):
                ...

        where `state` is either a :class:`~graph_tool.community.NestedBlockState`
        instance or ``None``, `S` is the current entropy value, `delta` is
        the entropy difference in the last MCMC sweep, and `nmoves` is the
        number of accepted block membership moves. The ``minimize_state``
        argument is a :class:`NestedMinimizeState` instance which specifies
        the current state of the algorithm, which can be stored via :mod:`pickle`,
        and supplied via the ``minimize_state`` option below to continue from an
        interrupted run.

        This function will also be called when the MCMC has finished for the
        current value of :math:`B`, in which case ``state == None``, and the
        remaining parameters will be zero, except the last.
    minimize_state : :class:`NestedMinimizeState` (optional, default: ``None``)
        If provided, this will specify an exact point of execution from which
        the algorithm will continue. The expected object is a
        :class:`NestedMinimizeState` instance which will be passed to the
        callback of the ``checkpoint`` option above, and  can be stored by
        :mod:`pickle`.
936
937
938
    frozen_levels : :class:`list` (optional, default: ``None``)
        List of levels (``int``s) which will remain unmodified during the
        algorithm.
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS : ``float``
       The description length difference (per edge) after the move.


    Notes
    -----

    This algorithm performs a constrained agglomerative heuristic on each level
    of the network, via the function :func:`~graph_tool.community.multilevel_minimize`.

    This algorithm has worst-case complexity of :math:`O(N\ln^2 N \times L)`,
    where  :math:`N` is the number of nodes in the network, and :math:`L` is
    the depth of the hierarchy.

    References
    ----------

962
963
964
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
965
966
967
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
968
    .. [peixoto-model-2015] Tiago P. Peixoto, "Model selection and hypothesis
969
       testing for large-scale network models with overlapping groups",
970
       Phys. Rev. X 5, 011033 (2015), :doi:`10.1103/PhysRevX.5.011033`,
971
       :arxiv:`1409.3059`.
972
973
    """

974
975
    dl_ent = kwargs.get("dl_ent", False)

976
977
978
979
980
    if minimize_state is None:
        minimize_state = NestedMinimizeState()
    mstate = minimize_state
    mstate.sync(state)

981
982
983
984
985
986
987
    args = dict(state=state, nsweeps=nsweeps, nmerge_sweeps=nmerge_sweeps,
                adaptive_sweeps=adaptive_sweeps, r=r, c=c, epsilon=epsilon,
                sequential=sequential, parallel=parallel, dl=dl, dense=dense,
                multigraph=multigraph, sparse_thresh=sparse_thresh, min_B=min_B,
                max_B=max_B, max_b=max_b, checkpoint=checkpoint,
                minimize_state=minimize_state, dl_ent=dl_ent,
                confine_layers=confine_layers)
988
989
990
991

    #_Si = state.entropy(dense=dense, multigraph=dense)
    dS = 0

992
993
994
    if frozen_levels is None:
        frozen_levels = set()

995
996
    while mstate.l >= 0:
        l = mstate.l
997

998
999
1000
1001
1002
1003
        if mstate.done[l]:
            if verbose:
                print("level", l, ": skipping", state.levels[l].B)
            mstate.l -= 1
            continue

1004
        Si = state.entropy(dl_ent=dl_ent)
1005

1006
        kept = True
1007

1008
1009
1010
1011
1012
1013
1014
1015
1016
        if l in frozen_levels:
            kept = False

        # replace level
        if kept:
            ddS, kept = replace_level(l, verbose=verbose, **args)
            dS += ddS
            mstate.clear_mstate()

1017
        if _bm_test():
1018
1019
            if kept:
                assert abs(state.entropy(dl_ent=dl_ent) - Si) < 1e-6, "inconsistent replace at level %d (%g, %g)" % (l, state.entropy(), Si)
1020
            state._NestedBlockState__consistency_check("replace level", l)
1021
1022

        # delete level
1023
1024
1025
        if (kept and l > 0 and l < len(state.levels) - 1 and
            not (min_B is not None and l == 1 and state.levels[l].B < min_B)):
            Si = state.entropy(dl_ent=dl_ent)
1026

1027
            bstates = [state.levels[l-1], state.levels[l], state.levels[l + 1]]
1028
1029

            state._NestedBlockState__delete_level(l)
1030
            #replace_level(l, **args)
1031

1032
            Sf = state.entropy(dl_ent=dl_ent)
1033
1034
1035
1036
1037

            mstate.clear_mstate()
            if Sf > Si:
                state.levels[l - 1] = bstates[0]
                state.levels.insert(l, bstates[1])
1038
                state.levels[l + 1] = bstates[2]
1039
1040
1041
1042
1043
1044
1045
            else:
                kept = False
                dS += Sf - Si

                mstate.delete(l)

                if verbose:
1046
                    print("level", l, ": deleted", (bstates[1].N, bstates[1].B), ", dS:", Sf - Si, len(state.levels))
1047

1048
            if _bm_test():
1049
1050
                if kept:
                    assert abs(state.entropy(dl_ent=dl_ent) - Si) < 1e-6, "inconsistent delete at level %d (%g, %g)" % (l, state.entropy(), Si)
1051
                state._NestedBlockState__consistency_check("delete complete", l)
1052

1053
1054
1055
1056
1057
1058
1059
1060
1061
        # insert new level (duplicate and replace)
        if kept and l > 0:
            Si = state.entropy(dl_ent=dl_ent)

            bstates = [state.levels[l].copy()]
            if l < len(state.levels) - 1:
                bstates.append(state.levels[l + 1].copy())
            if l < len(state.levels) - 2:
                bstates.append(state.levels[l + 2].copy())
1062
1063
1064

            state._NestedBlockState__duplicate_level(l)

1065
            replace_level(l + 1, verbose=False, **args)
1066

1067
            Sf = state.entropy(dl_ent=dl_ent)
1068
1069
1070
1071

            mstate.clear_mstate()
            if Sf >= Si:
                del state.levels[l + 1]
1072
1073
1074
1075
                for j in range(len(bstates)):
                    state.levels[l + j] = bstates[j]
                if bstates[-1].B == 1:
                    del state.levels[l + len(bstates):]
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
            else:
                kept = False
                dS += Sf - Si

                mstate.insert(l, state)

                l += 1

                if verbose:
                    print("level", l, ": inserted", state.levels[l].B, ", dS:", Sf - Si)

1087
            if _bm_test():
1088
1089
1090
                state._NestedBlockState__consistency_check("delete", l)
                if kept:
                    assert abs(state.entropy(dl_ent=dl_ent) - Si) < 1e-8, "inconsistent delete at level %d (%g, %g)" % (l, state.entropy(), Si)
1091

1092
        mstate.mark_level(l, done=True, state=state)
1093
1094
1095
1096
1097
1098
1099
        if not kept:
            if l + 1 < len(state.levels):
                mstate.mark_level(l + 1, done=False, state=state)
            if l > 0:
                mstate.mark_level(l - 1, done=False, state=state)
            l += 1
        else:
1100
1101
1102
1103
1104
            if ((l + 1 < len(state.levels) and not mstate.done[l + 1]) or
                (l + 1 == len(state.levels) and state.levels[l].B > 1)):
                l += 1
            else:
                l -= 1
1105
1106
1107
1108

        if l >= len(state.levels):
            l = len(state.levels) - 1

1109
        # create a new level at the top with B=1, if necessary
1110
        if l == len(state.levels) - 1 and state.levels[l].B > 1:
1111
1112
            NB = state.levels[l].B if not state.overlap else 2 * state.levels[l].E
            state._NestedBlockState__rebuild_level(l, b=state.levels[l].g.new_vertex_property("int"))
1113
1114
1115
1116
1117
1118
1119
            mstate.mark_level(l + 1, done=False, state=state)
            l += 1

        mstate.l = l
        if checkpoint is not None:
            checkpoint(None, 0, 0, 0, mstate)

1120
        if _bm_test():
1121
1122
1123
            state._NestedBlockState__consistency_check("tree sweep step", l)

    # _Sf = state.entropy(dense=dense, multigraph=dense, dl_ent=dl_ent)
1124
1125
1126
1127
1128

    return dS



1129
1130
1131
1132
1133
1134
1135
def init_nested_state(g, Bs, ec=None, deg_corr=True, overlap=False,
                      layers=False, confine_layers=False, dl=False, dense=False,
                      multigraph=True, eweight=None, vweight=None, clabel=None,
                      nsweeps=10, epsilon=0., r=2, nmerge_sweeps=10,
                      adaptive_sweeps=True, c=0, sequential=True,
                      parallel=False, sparse_thresh=100, checkpoint=None,
                      minimize_state=None, max_BE=1000, verbose=False, **kwargs):
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
    r"""Initializes a nested block hierarchy with sizes given by ``Bs``.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        The graph being modelled.
    Bs : list of ``int`` (optional, default: ``None``)
        Number of blocks for each hierarchy level.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be used in the bottom level, otherwise the traditional variant will be used.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge weights (i.e. multiplicity).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex weights (i.e. multiplicity).
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    epsilon : ``float`` (optional, default: ``0``)
        Converge criterion for ``adaptive_sweeps``.
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    c : ``float`` (optional, default: ``0.``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    sparse_thresh : ``int`` (optional, default: ``100``)
        If the number of nodes in the higher level multigraphs exceeds this
        number, the sparse entropy will be used to find the best partition,
        but the dense entropy will be used to compare different partitions.
    checkpoint : function (optional, default: ``None``)
        If provided, this function will be called after each call to
        :func:`mcmc_sweep`. This can be used to store the current state, so it
        can be continued later. The function must have the following signature:

        .. code-block:: python

            def checkpoint(state, S, delta, nmoves, minimize_state):
                ...

        where `state` is either a :class:`~graph_tool.community.NestedBlockState`
        instance or ``None``, `S` is the current entropy value, `delta` is
        the entropy difference in the last MCMC sweep, and `nmoves` is the
        number of accepted block membership moves. The ``minimize_state``
        argument is a :class:`NestedMinimizeState` instance which specifies
        the current state of the algorithm, which can be stored via :mod:`pickle`,
        and supplied via the ``minimize_state`` option below to continue from an
        interrupted run.

        This function will also be called when the MCMC has finished for the
        current value of :math:`B`, in which case ``state == None``, and the
        remaining parameters will be zero, except the last.
    minimize_state : :class:`NestedMinimizeState` (optional, default: ``None``)
        If provided, this will specify an exact point of execution from which
        the algorithm will continue. The expected object is a
        :class:`NestedMinimizeState` instance which will be passed to the
        callback of the ``checkpoint`` option above, and  can be stored by
        :mod:`pickle`.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    state : :class:`~graph_tool.community.NestedBlockState`
       The initialized nested state.


    Notes
    -----

    This algorithm performs an agglomerative heuristic on each level  of the
    network, via the function :func:`~graph_tool.community.multilevel_minimize`.

    This algorithm has worst-case complexity of :math:`O(N\ln^2 N \times L)`,
    where  :math:`N` is the number of nodes in the network, and :math:`L` is
    the depth of the hierarchy.

    References
    ----------

1230
1231
1232
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
1233
1234
1235
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
1236
    .. [peixoto-model-2015] Tiago P. Peixoto, "Model selection and hypothesis
1237
       testing for large-scale network models with overlapping groups",
1238
       Phys. Rev. X 5, 011033 (2015), :doi:`10.1103/PhysRevX.5.011033`,
1239
       :arxiv:`1409.3059`.
1240
1241
    """

1242
1243
    dl_ent = kwargs.get("dl_ent", False)

1244
1245
1246
1247
    if minimize_state is None:
        minimize_state = NestedMinimizeState()
    mstate = minimize_state

1248
1249
1250
    state = NestedBlockState(g, ec=ec, layers=layers, eweight=eweight,
                             vweight=vweight, Bs=[1], deg_corr=deg_corr,
                             overlap=overlap, clabel=clabel)
1251

1252
    chkp = get_checkpoint_wrap(checkpoint, state, minimize_state, dl_ent)
1253
1254
1255
1256
1257

    bg = g
    ecount = eweight

    for l, B in enumerate(Bs):
1258
        ba = None
1259
1260
1261
        if l < len(mstate.bs):
            ba = mstate.bs[l]
        else:
1262
            if l == 0:
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
                if ec is None:
                    if state.overlap:
                        bstate = OverlapBlockState(bg, B=bg.num_vertices(), #b=bg.vertex_index.copy("int"),
                                                   vweight=vweight,
                                                   eweight=ecount,
                                                   deg_corr=deg_corr != False,
                                                   #clabel=clabel,
                                                   max_BE=max_BE)
                    else:
                        bstate = BlockState(bg, B=bg.num_vertices(), #b=bg.vertex_index.copy("int"),
                                            vweight=vweight,
                                            eweight=ecount,
                                            deg_corr=deg_corr != False,
                                            #clabel=clabel,
                                            max_BE=max_BE)
1278
                else:
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
                    if overlap:
                        if confine_layers:
                            be = init_layer_confined(bg, ec)
                            B_init = None
                        else:
                            be = None
                            B_init = 2 * g.num_edges()
                    else:
                        be = None
                        B_init = g.num_vertices()

                    bstate = CovariateBlockState(bg, ec=ec,
                                                 lasers=layers,
                                                 B=B_init, #b=bg.vertex_index.copy("int"),
                                                 b=be,
                                                 vweight=vweight,
                                                 eweight=ecount,
                                                 deg_corr=deg_corr != False,
                                                 overlap=overlap,
                                                 #clabel=clabel,
                                                 max_BE=max_BE)

1301
1302
1303
1304
1305
1306
1307
1308
            else:
                bstate = state.levels[l-1].get_block_state(b=ba,
                                                           overlap=overlap == "full",
                                                           deg_corr=deg_corr == "full")[0]

            if B == 1:
                bstate.copy(b=bstate.g.new_vertex_property("int").a)
            else:
1309
1310
                bstate = multilevel_minimize(bstate, B, nsweeps=nsweeps,
                                             epsilon=epsilon,
1311
                                             r=r, nmerge_sweeps=nmerge_sweeps,
1312
                                             adaptive_sweeps=adaptive_sweeps,
1313
1314
1315
                                             greedy=True, c=c, dl=dl,
                                             dense=(l > 0 and g.num_vertices() < sparse_thresh) or dense,
                                             multigraph=(l > 0 and g.num_vertices() < sparse_thresh) or multigraph,
1316
1317
1318
                                             sequential=sequential,
                                             parallel=parallel,
                                             verbose=verbose,
1319
1320
                                             checkpoint=chkp,
                                             minimize_state=minimize_state.minimize_state)
1321
1322
            ba = array(bstate.b.fa)
            mstate.bs.append(ba)
1323
            minimize_state.clear_mstate()
1324

1325
        state._NestedBlockState__rebuild_level(len(state.levels) - 1, b=ba)
1326
1327
1328
1329
1330
        if ec is None:
            bg = state.levels[l].bg
            ecount = state.levels[l].mrs
        else:
            bg, ecount, ec = state.levels[l].get_bg()
1331

1332
1333
1334

    for l, B in enumerate(Bs):
        if l + 1 < len(state.levels):
1335
            assert state.levels[l].B == state.levels[l + 1].N, (state.levels[l].B, state.levels[l + 1].N)
1336

1337
1338