nested_blockmodel.py 67.7 KB
Newer Older
1
2
3
4
5
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2014 Tiago de Paula Peixoto <tiago@skewed.de>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
    range = xrange

from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap
from .. stats import label_self_loops
import random
from numpy import *
import numpy
from scipy.optimize import fsolve, fminbound
import scipy.special
from collections import defaultdict
import copy
import heapq

from . blockmodel import *
38
from . blockmodel import __test__
39
40
41
42
43
44
45

class NestedBlockState(object):
    r"""This class encapsulates the nested block state of a given graph.

    This must be instantiated and used by functions such as :func:`nested_mcmc_sweep`.

    The instances of this class contain a data member called ``levels``, which
46
47
48
    is a list of :class:`~graph_tool.community.BlockState` (or
    :class:`~graph_tool.community.OverlapBlockState`) instances, containing the
    entire nested hierarchy.
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge weights (i.e. multiplicity).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex weights (i.e. multiplicity).
    bs : list of :class:`~graph_tool.PropertyMap` or :class:`~numpy.ndarray` instances (optional, default: ``None``)
        Initial block labels on the vertices, for each hierarchy level.
    Bs : list of ``int`` (optional, default: ``None``)
        Number of blocks for each hierarchy level.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be used in the bottom level, otherwise the traditional variant will be used.
65
66
67
68
69
70
    overlap : ``bool`` (optional, default: ``False``)
        If ``True``, the mixed-membership version of the blockmodel will be used
        at the lowest level.
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
71
72
73
74
75
76
77
    max_BE : ``int`` (optional, default: ``1000``)
        If the number of blocks exceeds this number, a sparse representation of
        the block graph is used, which is slightly less efficient, but uses less
        memory,
    """

    def __init__(self, g, eweight=None, vweight=None, bs=None,
78
79
                 Bs=None, deg_corr=True, overlap=False, clabel=None,
                 max_BE=1000):
80
        L = len(Bs) if Bs is not None else len(bs)
81
82
83
        self.g = cg = g
        self.vweight = vcount = vweight
        self.eweight = ecount = eweight
84
85

        self.levels = []
86
87
88
        self.overlap = overlap
        self.deg_corr = deg_corr
        self.clabel = clabel if clabel is not None else g.new_vertex_property("int")
89

90
91
        for l in range(L):
            Bl = Bs[l] if Bs is not None else None
92
93
            bl = None
            if bs is not None:
94
95
                if isinstance(bs[l], PropertyMap):
                    bl = cg.own_property(bs[l])
96
                else:
97
                    bl = bs[l]
98

99
100
101
102
103
104
105
106
107
            if l == 0:
                if overlap:
                    state = OverlapBlockState(g, B=Bl, b=bl,
                                              eweight=ecount,
                                              vweight=vcount,
                                              deg_corr=deg_corr != False,
                                              clabel=self.clabel,
                                              max_BE=max_BE)
                    self.clabel = state.clabel.copy()
108
                    state.clabel.a = 0
109
                else:
110
111
112
113
                    state = BlockState(g, B=Bl, b=bl,
                                       eweight=ecount,
                                       vweight=vcount,
                                       deg_corr=deg_corr != False,
114
                                       #clabel=self.clabel,
115
                                       max_BE=max_BE)
116
            else:
117
118
119
120
121
122
123
124
                state = self.levels[-1].get_block_state(b=bl,
                                                        overlap=self.overlap == "full",
                                                        deg_corr=self.deg_corr == "full")[0]
                if __test__:
                    assert not state.deg_corr, "upper levels must be non-deg-corr"

            self.levels.append(state)

125
126
127
        # for l in range(len(self.levels) - 1):
        #     clabel = self.__project_partition(l, l+1)
        #     self.levels[l].clabel = clabel
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184


    def copy(self, deg_corr=None, overlap=None, clabel=None):
        r"""Copies the block state. The parameters override the state properties, and
         have the same meaning as in the constructor.."""

        bs = [s.b.a for s in self.levels]
        if overlap is None:
            overlap = self.overlap
        elif self.overlap and not overlap:
            raise ValueError("Cannot automatically convert overlapping nested state to nonoverlapping")
        elif not self.overlap and overlap:
            s = self.level[0].copy(overlap=True)
            bs[0] = s.b.a
        if deg_corr is None:
            deg_corr = self.deg_corr
        return NestedBlockState(self.g, self.eweight, self.vweight,
                                bs, deg_corr=deg_corr, overlap=overlap,
                                clabel=clabel, max_BE=self.levels[0].max_BE)

    def __getstate__(self):
        state = dict(g=self.g,
                     eweight=self.eweight,
                     vweight=self.vweight,
                     overlap=self.overlap,
                     bs=[array(s.b.a) for s in self.levels],
                     clabel=self.clabel,
                     deg_corr=self.deg_corr,
                     max_BE=self.levels[0].max_BE)
        return state

    def __setstate__(self, state):
        self.__init__(**state)
        return state

    def __project_partition(self, l, j):
        """Project partition of level 'j' onto level 'l'"""
        if self.overlap != "full":
            b = self.levels[l].b.copy()
            for i in range(l + 1, j + 1):
                clabel = self.levels[i].b.copy()
                pmap(b, clabel)
        else:
            b = self.levels[j].b.copy()
        return b

    def __propagate_clabel(self, l):
        clabel = self.clabel.copy()
        for j in range(l):
            bclabel = self.levels[j].bg.new_vertex_property("int")
            reverse_map(self.levels[j].b, bclabel)
            pmap(bclabel, clabel)
            clabel = bclabel
        return clabel

    def __consistency_check(self, op, l):

185
186
        print("consistency check after", op, "at level", l)

187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
        for j in range(len(self.levels)):

            c_state = self.levels[j].copy()
            S1 = self.levels[j].entropy()
            S2 = c_state.entropy()

            assert abs(S1 - S2) < 1e-8 and not isnan(S2) and not isnan(S1), "inconsistency at level %d after %s of level %d, different entropies of copies! (%g, %g)" % (j, op, l, S1, S2)


            if self.levels[j].wr.a.min() == 0:
                print("WARNING: empty blocks at level", j)
            if self.levels[j].b.a.max() + 1 != self.levels[j].B:
                print("WARNING: b.max() + 1 != B at level", j, self.levels[j].b.max() + 1, self.levels[j].B)

        for j in range(len(self.levels) - 1):

            B = self.levels[j].b.a.max() + 1
            bg_state = self.levels[j].get_block_state(b=self.levels[j+1].b.copy(),
                                                      overlap=self.overlap == "full",
                                                      deg_corr=self.deg_corr == "full")[0]

            S1 = bg_state.entropy(dense=True and self.deg_corr != "full", multigraph=True)
            S2 = self.levels[j+1].entropy(dense=True and self.deg_corr != "full", multigraph=True)

            if self.levels[j].B != self.levels[j+1].N or S1 != S2:
                self.print_summary()

                from graph_tool.topology import similarity
                print(bg_state)
                print(self.levels[j+1])
                print("N, B:", bg_state.N, bg_state.B)
                print("N, B:", self.levels[j + 1].N, self.levels[j + 1].B)
                print("similarity:", similarity(bg_state.g, self.levels[j+1].g))
                print("b:", bg_state.b.a)
                print("b:", self.levels[j+1].b.a)

                print("wr:", bg_state.wr.a)
                print("wr:", self.levels[j+1].wr.a)

                print("mrs:", bg_state.mrs.a)
                print("mrs:", self.levels[j+1].mrs.a)

                print("eweight:", bg_state.eweight.a)
                print("eweight:", self.levels[j+1].eweight.a)

                print("vweight:", bg_state.vweight.a)
                print("vweight:", self.levels[j+1].vweight.a)


            assert abs(S1 - S2) < 1e-6 and not isnan(S2) and not isnan(S1), "inconsistency at level %d after %s of level %d, different entropies (%g, %g)" % (j, op, l, S1, S2)
            assert self.levels[j].B == self.levels[j+1].N, "inconsistency  at level %d after %s of level %d, different sizes" % (j + 1, op, l)


240
241
242
243
244
245
246
247
248
249
250
251
252
            # verify hierarchy / clabel consistency
            clabel = self.__project_partition(0, j)
            self.levels[0].clabel.a = self.clabel.a
            assert self.levels[0]._BlockState__check_clabel(),  "inconsistency at level %d after %s of level %d, clabel invalidated" % (j + 1, op, l)
            self.levels[0].clabel.a = 0

            # verify hierarchy consistency
            clabel = self.__project_partition(j, j + 1)
            self.levels[0].clabel.a = self.clabel.a
            assert self.levels[0]._BlockState__check_clabel(),  "inconsistency at level %d after %s of level %d, partition not compatible with upper level" % (j + 1, op, l)
            self.levels[0].clabel.a = 0


253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
    def __rebuild_level(self, l, b, clabel=None):
        r"""Replace level ``l`` given the new partition ``b``, and the
        projected upper level partition clabel."""

        if __test__:
            assert clabel is not None or l == len(self.levels) - 1, "clabel not given for intermediary level"

        if clabel is None:
            clabel = self.levels[l].g.new_vertex_property("int")

        old_b = b.copy()

        state = self.levels[l].copy(b=b, clabel=clabel.a)
        self.levels[l] = state

        # upper level
        bclabel = state.get_bclabel()
        bstate = self.levels[l].get_block_state(b=bclabel,
                                                overlap=self.overlap == "full",
                                                deg_corr=self.deg_corr == "full")[0]
        if l == len(self.levels) - 1:
            self.levels.append(None)
        self.levels[l + 1] = bstate

277
278
279
280
281
        self.levels[l].clabel.a = 0
        self.levels[l + 1].clabel.a = 0

        # if l + 1 < len(self.levels) - 1:
        #     self.levels[l + 1].clabel = self.__project_partition(l + 1, l + 2)
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325


        if l + 1 < len(self.levels) - 1:
            bstate = self.levels[l + 1].get_block_state(b=self.levels[l + 2].b,
                                                        overlap=self.overlap == "full",
                                                        deg_corr=self.deg_corr == "full")[0]

            if __test__:
                from graph_tool.topology import similarity
                print("- similarity:", similarity(bstate.g, self.levels[l + 2].g))
                # if similarity(bstate.g, self.levels[l + 2].g) < 1:
                #     from graph_tool.draw import graph_draw
                #     graph_draw(bstate.g)
                #     graph_draw(self.levels[l + 2].g)


                if abs(bstate.entropy() - self.levels[l + 2].entropy()) > 1e-6:
                    print("********************** inconsistent rebuild! **************************")

                    print(bstate.b.a)
                    print(self.levels[l + 2].b.a)

                    print(bstate.wr.a)
                    print(self.levels[l + 2].wr.a)

                    print(bstate.eweight.a)
                    print(self.levels[l + 2].eweight.a)

                    # from graph_tool.draw import graph_draw, prop_to_size
                    # graph_draw(bstate.g, vertex_fill_color=bstate.b)
                    # graph_draw(self.levels[l + 2].g, vertex_fill_color=self.levels[l + 2].b)
                    nclabel = self.__project_partition(l, l + 1)
                    print(nclabel.a)
                    print(clabel.a)
                    print(self.levels[l].b.a)
                    print(self.levels[l+1].b.a)
                    print(self.levels[l+2].b.a)
                    print(bstate.b.a)
                print ("DS", l, l + 1, bstate.entropy(), self.levels[l + 2].entropy())

        B = self.levels[l].B

        if __test__:
            self.__consistency_check("rebuild", l)
326
327
328
329

    def __delete_level(self, l):
        if l == 0:
            raise ValueError("cannot delete level l=0")
330
331
332
333
334
335
        b = self.__project_partition(l - 1, l)
        if l < len(self.levels) - 1:
            clabel = self.__project_partition(l - 1, l + 1)
        else:
            clabel = None

336
337
        del self.levels[l]

338
339
340
341
        self.__rebuild_level(l - 1, b=b, clabel=clabel)

        if __test__:
            self.__consistency_check("delete", l)
342

343
    def __duplicate_level(self, l):
344
        assert l > 0, "attempted to duplicate level 0"
345
346
347
        if not self.levels[l].overlap:
            bstate = self.levels[l].copy(b=self.levels[l].g.vertex_index.copy("int"))
        else:
348
            bstate = self.levels[l].copy(b=arange(self.levels[l].g.num_vertices()))
349
        self.levels.insert(l, bstate)
350

351
352
        if __test__:
            self.__consistency_check("duplicate", l)
353
354


355
356
    def level_entropy(self, l, complete=True, dense=False, multigraph=True,
                      norm=True, dl_ent=False):
357
358
359
360
361
362
363
364
365
366
367
        r"""Compute the description length of hierarchy level l.

        Parameters
        ----------
        l : ``int``
            Hierarchy level.
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
368
369
370
371
372
373
374
375
        multigraph : ``bool`` (optional, default: ``True``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.
376
377
378
379
        """

        bstate = self.levels[l]

380
381
        S = bstate.entropy(dl=True, partition_dl=True,
                           dense=dense or (l > 0 and self.deg_corr != "full"),
382
                           multigraph=multigraph or l > 0,
383
384
385
                           complete=complete or (l > 0 and self.deg_corr == "full"),
                           norm=norm,
                           dl_ent=dl_ent)
386

387
388
        return S

389
390
    def entropy(self, complete=True, dense=False, multigraph=True, norm=True,
                dl_ent=False):
391
392
393
394
395
396
397
398
399
        r"""Compute the description length of the entire hierarchy.

        Parameters
        ----------
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
400
401
402
403
404
405
406
407
        multigraph : ``bool`` (optional, default: ``True``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.
408
409
410
411
        """

        S = 0
        for l in range(len(self.levels)):
412
413
414
            S += self.level_entropy(l, complete=complete, dense=dense,
                                    multigraph=multigraph, norm=norm,
                                    dl_ent=dl_ent)
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
        return S

    def get_bstack(self):
        r"""Return the nested levels as individual graphs.

        This returns a list of :class:`~graph_tool.Graph` instances
        representing the inferred hierarchy at each level. Each graph has two
        internal vertex and edge property maps named "count" which correspond to
        the vertex and edge counts at the lower level, respectively. Additionally,
        an internal vertex property map named "b" specifies the block partition.
        """

        bstack = []
        for l, bstate in enumerate(self.levels):
            cg = bstate.g
            if l == 0:
                cg = GraphView(cg, skip_properties=True)
            cg.vp["b"] = bstate.b.copy()
            cg.ep["count"] = bstate.eweight
434
435
436
            if bstate.overlap:
                cg.vp["node_index"] = bstate.node_index.copy()

437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
            bstack.append(cg)
            if bstate.N == 1:
                break
        if bstack[-1].num_vertices() > 1:
            cg = Graph(directed=bstack[-1].is_directed())
            cg.add_vertex()
            cg.vp["b"] = cg.new_vertex_property("int")
            e = cg.add_edge(0, 0)
            ew = cg.new_edge_property("int")
            ew[e] = self.levels[-1].E
            cg.ep["count"] = ew
            bstack.append(cg)
        return bstack


    def project_level(self, l):
453
454
455
        r"""Project the partition at level ``l`` onto the lowest level, and return the
        corresponding :class:`~graph_tool.community.BlockState` (or
        :class:`~graph_tool.community.OverlapBlockState`).  """
456
        if self.overlap != "full":
457
458
459
460
461
462
463
464
            clabel = b = self.levels[l].b.copy()
            while l - 1 >= 0:
                clabel = b
                b = self.levels[l - 1].b.copy()
                pmap(b, clabel)
                l -= 1
        else:
            b = self.levels[l].b.copy()
465
        state = self.levels[0].copy(b=b.a)
466
        return state
467

468
469
470
    def print_summary(self):
        for l, state in enumerate(self.levels):
            print("l: %d, N: %d, B: %d" % (l, state.N, state.B))
471

472
473

def nested_mcmc_sweep(state, beta=1., c=1., dl=False, sequential=True, verbose=False):
474
475
476
477
478
479
480
481
482
483
484
485
486
    r"""Performs a Markov chain Monte Carlo sweep on all levels of the hierarchy.

    Parameters
    ----------
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
    beta : `float` (optional, default: `1.0`)
        The inverse temperature parameter :math:`\beta`.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
487
488
489
    dl : ``bool`` (optional, default: ``False``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS_moves : list of (``float``, ``int``) tuples
       The entropy difference (per edge) and number of accepted block membership
       moves after a full sweep for each level.


    Notes
    -----

    This algorithm performs a Markov chain Monte Carlo sweep on each level of the
    network, via the function :func:`~graph_tool.community.mcmc_sweep`.

    This algorithm has a worse-case complexity of :math:`O(E \times L)`, where
    :math:`E` is the number of edges in the network, and :math:`L` is the depth
    of the hierarchy.

    Examples
    --------
    .. testsetup:: nested_mcmc

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: nested_mcmc

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.NestedBlockState(g, Bs=[10, 5, 3, 2, 1], deg_corr=True)
       >>> ret = gt.nested_mcmc_sweep(state)
       >>> print(ret)
529
       [(0.0, 0), (0.0, 0), (0.0, 0), (0.0, 0), (0.0, 0)]
530
531
532
533

    References
    ----------

534
535
536
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
537
538
539
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
540
541
542
    .. [peixoto-model-2014] Tiago P. Peixoto, "Model selection and hypothesis
       testing for large-scale network models with overlapping groups",
       :arxiv:`1409.3059`.
543
544
545
546
547
548
    """

    rets = []
    for l, bstate in enumerate(state.levels):
        if verbose:
            print("Level:", l, "N:", bstate.N, "B:", bstate.B)
549
550
551
552
553
554
555
556
557
558
559
560
561
562

        # constraint partitions not to invalidate upper layers
        if l < len(state.levels) - 1:
            clabel = state._NestedBlockState__project_partition(l, l + 1)
        else:
            clabel = bstate.g.new_vertex_property("int")

        # propagate externally imposed clabel at the bottom
        cclabel = state._NestedBlockState__propagate_clabel(l)
        cclabel.a += clabel.a * (cclabel.a.max() + 1)
        continuous_map(cclabel)

        bstate.clabel = cclabel

563
564
        ret = mcmc_sweep(bstate, beta=beta, c=c, dl=dl,
                         dense = l > 0 and state.deg_corr != "full",
565
                         multigraph = l > 0,
566
                         sequential=sequential, verbose=verbose)
567
568
569

        bstate.clable.a = 0

570
571
572
        rets.append(ret)
    return rets

573
574
575
576
577
def replace_level(l, state, min_B=None, max_B=None, max_b=None, nsweeps=10,
                  nmerge_sweeps=10, r=2, c=0, epsilon=0., sequential=True,
                  dl=False, dense=False, multigraph=True, sparse_thresh=100,
                  verbose=False, checkpoint=None, minimize_state=None,
                  dl_ent=False):
578
    r"""Replaces level l with another state with a possibly different number of
579
580
581
    groups. This may change not only the state at level l, but also the one at
    level l + 1, which needs to be 'rebuilt' because of the label changes at
    level l."""
582

583
584
585
    if __test__:
        state._NestedBlockState__consistency_check("(before) replace level", l)

586
587
    bstate = state.levels[l]
    g = bstate.g
588
589
    base_g = g if not bstate.overlap else bstate.base_g
    eweight = bstate.eweight if not bstate.overlap else None
590

591
592
593
594
595
596
597
    if l > 0 or min_B is None:
        if l + 1 < len(state.levels):
            min_B = state.levels[l + 1].B
        else:
            min_B = 1
    if l > 0 or max_B is None:
        max_B = bstate.N
598

599
600
601
    min_B = max(min_B, state.clabel.a.max() + 1)

    # constraint partitions not to invalidate upper layers
602
603
    if l < len(state.levels) - 1:
        clabel = state._NestedBlockState__project_partition(l, l + 1)
604
    else:
605
606
        clabel = bstate.g.new_vertex_property("int")

607
    assert min_B <= max_B, (min_B, max_B, bstate.B, bstate.N, g.num_vertices(), state.clabel.a.max() + 1, clabel.a.max() + 1, l)
608

609
    # propagate externally imposed clabel at the bottom
610
    cclabel = state._NestedBlockState__propagate_clabel(l)
611
    assert cclabel.a.max() <= state.clabel.a.max(),  (cclabel.a.max(), state.clabel.a.max())
612
613
    cclabel.a += clabel.a * (cclabel.a.max() + 1)
    continuous_map(cclabel)
614

615
616
    min_B = max(min_B, cclabel.a.max() + 1)

617
    assert min_B <= max_B, (min_B, max_B, bstate.B, bstate.N, g.num_vertices(), cclabel.a.max() + 1, state.clabel.a.max() + 1, clabel.a.max() + 1, l)
618
619
620

    if __test__:
        assert bstate._BlockState__check_clabel(), "invalid clabel before minimize!"
621
622
623

    nested_dl = l < len(state.levels) - 1

624
625
626
627
628
629
630
631
632
633
634
    Sb = get_b_dl(state.levels[l],
                  dense=(l > 0 and state.deg_corr != "full") or dense,
                  multigraph=l > 0 or multigraph,
                  nested_dl=nested_dl,
                  nested_overlap=state.overlap == "full",
                  dl_ent=dl_ent)

    if __test__:
        assert clabel.a.max() + 1 <= min_B

    if l == 0 and max_b is not None:
635
636
637
        istate = state.levels[0].copy(b=max_b.copy(),
                                      clabel=cclabel.a if state.overlap else cclabel)
        assert istate._BlockState__check_clabel(), "invalid clabel!"
638
639
640
641
642
643
644
645
646
647
648
        if istate.B > max_B:
            istate = multilevel_minimize(istate, B=max_B, nsweeps=nsweeps,
                                         nmerge_sweeps=nmerge_sweeps, c=c,
                                         r=r, dl=dl, sequential=sequential,
                                         adaptive_sweeps=True,
                                         greedy_cooling=True,
                                         epsilon=epsilon,
                                         verbose=verbose=="full")
        init_states = [istate]
    else:
        init_states = None
649

650
651
652
653
654
    res = minimize_blockmodel_dl(base_g, eweight=eweight,
                                 deg_corr=bstate.deg_corr,
                                 nsweeps=nsweeps,
                                 nmerge_sweeps=nmerge_sweeps,
                                 c=c, r=r, sequential=sequential,
655
                                 adaptive_sweeps=True, greedy_cooling=True,
656
                                 epsilon=epsilon,
657
658
                                 max_B=max_B,
                                 min_B=min_B,
659
                                 clabel=cclabel if not bstate.overlap else cclabel.a,
660
                                 max_BE=bstate.max_BE,
661
662
663
664
                                 dl=dl,
                                 dense=(l > 0 and state.deg_corr != "full") or dense,
                                 multigraph=l > 0 or multigraph,
                                 sparse_heuristic=base_g.num_vertices() > sparse_thresh,
665
                                 nested_dl=nested_dl,
666
667
668
669
670
671
                                 overlap=bstate.overlap,
                                 nested_overlap=state.overlap == "full",
                                 nonoverlap_compare=init_states is None,
                                 nonoverlap_init=False,
                                 init_states=init_states,
                                 verbose=verbose=="full",
672
                                 ##exaustive=g.num_vertices() <= 100,
673
                                 #minimize_state=minimize_state.minimize_state, >>>>>> HERE <<<<<
674
675
676
677
                                 checkpoint=checkpoint,
                                 dl_ent=dl_ent)

    if __test__:
678
        assert (res.clabel.a == cclabel.a).all(), (res.clabel.a, cclabel.a)
679
        assert res._BlockState__check_clabel(), "invalid clabel after minimize!"
680
681
682

    res.clabel.a = 0
    b = res.b
683
684
685
686
687
688
689

    Sf = get_b_dl(res,
                  dense=(l > 0 and state.deg_corr != "full") or dense,
                  multigraph=l > 0 or multigraph,
                  nested_dl=nested_dl,
                  nested_overlap=state.overlap == "full",
                  dl_ent=dl_ent)
690
691
692
693
694
695
696
697

    kept = False
    if Sf - Sb >= -1e-10:
        kept = True
    if Sf - Sb == 0 and bstate.B != state.levels[l].B:
        kept = False

    if kept:
698
        Sf_rej = Sf
699
700
        Sf = Sb
    else:
701
702
        # rebuild current level
        state._NestedBlockState__rebuild_level(l, b=b, clabel=clabel)
703
704

    if verbose:
705
706
707
708
709
        print("level", l, ": resizing", bstate.B, "->", state.levels[l].B, ", dS:", Sf - Sb, end="")
        if kept:
            print(" [kept, rejected (%d, %g) vs (%d, %g)]" % (res.B, Sf_rej, bstate.B, Sb))
        else:
            print()
710

711
712
713
    if __test__:
        state._NestedBlockState__consistency_check("replace level", l)

714
715
716
717
718
719
720
721
722
723
724
    dS = Sf - Sb
    return dS, kept


class NestedMinimizeState(object):
    r"""This object stores information regarding the current entropy minimization
    state, so that the algorithms can resume previously started runs.
    This object can be saved to disk via the :mod:`pickle` interface."""

    def __init__(self):
        self.minimize_state = MinimizeState()
725
        self.l = 0
726
727
        self.bs = []
        self.done = []
728
        self.init = True
729
730
731

    def clear(self):
        self.minimize_state.clear()
732
        self.l = 0
733
734
        del self.bs[:]
        del self.done[:]
735
736
737
738

    def sync(self, state):
        if len(self.bs) == 0:
            for s in state.levels:
739
                self.bs.append(array(s.b.fa))
740
741
742
743
744
745
746
747
748
        while len(self.done) < len(state.levels):
            self.done.append(False)

    def delete(self, l):
        del self.done[l]
        del self.bs[l]

    def insert(self, l, state):
        self.done.insert(l + 1, False)
749
750
        ba = array(state.levels[l].b.fa)
        self.bs.insert(l + 1, ba)
751
752
753

    def mark_level(self, l, done, state):
        while len(state.levels) > len(self.bs):
754
755
            ba = array(state.levels[len(self.bs)].b.fa)
            self.bs.append(ba)
756
757
758
759
760
761
762
            self.done.append(False)

        self.done[l] = done
        if done:
            self.bs[l] = array(state.levels[l].b.fa)

    def clear_mstate(self):
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
        self.minimize_state.clear()

def get_checkpoint_wrap(checkpoint, state, minimize_state, dl_ent):
    S_total = state.entropy(complete=True, dl_ent=dl_ent)

    if checkpoint is not None:
        def check_wrap(bstate, Sb, delta, nmoves, ms):
            l = minimize_state.l
            bstate = state.levels[l]
            S_l = bstate.entropy()
            S = S_total - S_l + Sb
            if bstate is None:
                checkpoint(None, S, delta, nmoves, minimize_state)
            else:
                checkpoint(state, S, delta, nmoves, minimize_state)
        chkp = check_wrap
    else:
        chkp = None

    return chkp
783

784
785
786
787
788
789

def nested_tree_sweep(state, min_B=None, max_B=None, max_b=None, nsweeps=10,
                      epsilon=0., r=2., nmerge_sweeps=10, c=0, dl=False,
                      dense=False, multigraph=True, sequential=True,
                      sparse_thresh=100, checkpoint=None, minimize_state=None,
                      frozen_levels=None, verbose=False, **kwargs):
790
791
792
793
794
795
796
    r"""Performs one greedy sweep in the entire hierarchy tree, attempting to
    decrease its description length.

    Parameters
    ----------
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
797
798
799
800
801
802
803
    min_B : ``int`` (optional, default: ``None``)
        Minimum number of blocks at the lowest level.
    max_B : ``int`` (optional, default: ``None``)
        Maximum number of blocks at the lowest level.
    max_b : ``int`` (optional, default: ``None``)
        Block partition used for the maximum number of blocks at the lowest
        level.
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    epsilon : ``float`` (optional, default: ``0``)
        Converge criterion for ``adaptive_sweeps``.
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    sparse_thresh : ``int`` (optional, default: ``100``)
        If the number of nodes in the higher level multigraphs exceeds this
        number, the sparse entropy will be used to find the best partition,
        but the dense entropy will be used to compare different partitions.
    checkpoint : function (optional, default: ``None``)
        If provided, this function will be called after each call to
        :func:`mcmc_sweep`. This can be used to store the current state, so it
        can be continued later. The function must have the following signature:

        .. code-block:: python

            def checkpoint(state, S, delta, nmoves, minimize_state):
                ...

        where `state` is either a :class:`~graph_tool.community.NestedBlockState`
        instance or ``None``, `S` is the current entropy value, `delta` is
        the entropy difference in the last MCMC sweep, and `nmoves` is the
        number of accepted block membership moves. The ``minimize_state``
        argument is a :class:`NestedMinimizeState` instance which specifies
        the current state of the algorithm, which can be stored via :mod:`pickle`,
        and supplied via the ``minimize_state`` option below to continue from an
        interrupted run.

        This function will also be called when the MCMC has finished for the
        current value of :math:`B`, in which case ``state == None``, and the
        remaining parameters will be zero, except the last.
    minimize_state : :class:`NestedMinimizeState` (optional, default: ``None``)
        If provided, this will specify an exact point of execution from which
        the algorithm will continue. The expected object is a
        :class:`NestedMinimizeState` instance which will be passed to the
        callback of the ``checkpoint`` option above, and  can be stored by
        :mod:`pickle`.
860
861
862
    frozen_levels : :class:`list` (optional, default: ``None``)
        List of levels (``int``s) which will remain unmodified during the
        algorithm.
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS : ``float``
       The description length difference (per edge) after the move.


    Notes
    -----

    This algorithm performs a constrained agglomerative heuristic on each level
    of the network, via the function :func:`~graph_tool.community.multilevel_minimize`.

    This algorithm has worst-case complexity of :math:`O(N\ln^2 N \times L)`,
    where  :math:`N` is the number of nodes in the network, and :math:`L` is
    the depth of the hierarchy.

    References
    ----------

886
887
888
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
889
890
891
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
892
893
894
    .. [peixoto-model-2014] Tiago P. Peixoto, "Model selection and hypothesis
       testing for large-scale network models with overlapping groups",
       :arxiv:`1409.3059`.
895
896
    """

897
898
    dl_ent = kwargs.get("dl_ent", False)

899
900
901
902
903
904
    if minimize_state is None:
        minimize_state = NestedMinimizeState()
    mstate = minimize_state
    mstate.sync(state)

    args = dict(state=state, nsweeps=nsweeps, nmerge_sweeps=nmerge_sweeps, r=r,
905
906
907
908
                c=c, epsilon=epsilon, sequential=sequential, dl=dl, dense=dense,
                multigraph=multigraph, sparse_thresh=sparse_thresh,
                min_B=min_B, max_B=max_B, max_b=max_b, checkpoint=checkpoint,
                minimize_state=minimize_state, dl_ent=dl_ent)
909
910
911
912

    #_Si = state.entropy(dense=dense, multigraph=dense)
    dS = 0

913
914
915
    if frozen_levels is None:
        frozen_levels = set()

916
917
    while mstate.l >= 0:
        l = mstate.l
918

919
920
921
922
923
924
        if mstate.done[l]:
            if verbose:
                print("level", l, ": skipping", state.levels[l].B)
            mstate.l -= 1
            continue

925
        Si = state.entropy(dl_ent=dl_ent)
926

927
        kept = True
928

929
930
931
932
933
934
935
936
937
938
939
940
        if l in frozen_levels:
            kept = False

        # replace level
        if kept:
            ddS, kept = replace_level(l, verbose=verbose, **args)
            dS += ddS
            mstate.clear_mstate()

        if __test__:
            if kept:
                assert abs(state.entropy(dl_ent=dl_ent) - Si) < 1e-6, "inconsistent replace at level %d (%g, %g)" % (l, state.entropy(), Si)
941
            state._NestedBlockState__consistency_check("replace level", l)
942
943

        # delete level
944
945
946
        if (kept and l > 0 and l < len(state.levels) - 1 and
            not (min_B is not None and l == 1 and state.levels[l].B < min_B)):
            Si = state.entropy(dl_ent=dl_ent)
947

948
            bstates = [state.levels[l-1], state.levels[l], state.levels[l + 1]]
949
950

            state._NestedBlockState__delete_level(l)
951
            #replace_level(l, **args)
952

953
            Sf = state.entropy(dl_ent=dl_ent)
954
955
956
957
958

            mstate.clear_mstate()
            if Sf > Si:
                state.levels[l - 1] = bstates[0]
                state.levels.insert(l, bstates[1])
959
                state.levels[l + 1] = bstates[2]
960
961
962
963
964
965
966
            else:
                kept = False
                dS += Sf - Si

                mstate.delete(l)

                if verbose:
967
                    print("level", l, ": deleted", (bstates[1].N, bstates[1].B), ", dS:", Sf - Si, len(state.levels))
968

969
970
971
            if __test__:
                if kept:
                    assert abs(state.entropy(dl_ent=dl_ent) - Si) < 1e-6, "inconsistent delete at level %d (%g, %g)" % (l, state.entropy(), Si)
972
                state._NestedBlockState__consistency_check("delete complete", l)
973

974
975
976
977
978
979
980
981
982
        # insert new level (duplicate and replace)
        if kept and l > 0:
            Si = state.entropy(dl_ent=dl_ent)

            bstates = [state.levels[l].copy()]
            if l < len(state.levels) - 1:
                bstates.append(state.levels[l + 1].copy())
            if l < len(state.levels) - 2:
                bstates.append(state.levels[l + 2].copy())
983
984
985

            state._NestedBlockState__duplicate_level(l)

986
            replace_level(l + 1, verbose=False, **args)
987

988
            Sf = state.entropy(dl_ent=dl_ent)
989
990
991
992

            mstate.clear_mstate()
            if Sf >= Si:
                del state.levels[l + 1]
993
994
995
996
                for j in range(len(bstates)):
                    state.levels[l + j] = bstates[j]
                if bstates[-1].B == 1:
                    del state.levels[l + len(bstates):]
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
            else:
                kept = False
                dS += Sf - Si

                mstate.insert(l, state)

                l += 1

                if verbose:
                    print("level", l, ": inserted", state.levels[l].B, ", dS:", Sf - Si)

1008
1009
1010
1011
            if __test__:
                state._NestedBlockState__consistency_check("delete", l)
                if kept:
                    assert abs(state.entropy(dl_ent=dl_ent) - Si) < 1e-8, "inconsistent delete at level %d (%g, %g)" % (l, state.entropy(), Si)
1012

1013
        mstate.mark_level(l, done=True, state=state)
1014
1015
1016
1017
1018
1019
1020
        if not kept:
            if l + 1 < len(state.levels):
                mstate.mark_level(l + 1, done=False, state=state)
            if l > 0:
                mstate.mark_level(l - 1, done=False, state=state)
            l += 1
        else:
1021
1022
1023
1024
1025
            if ((l + 1 < len(state.levels) and not mstate.done[l + 1]) or
                (l + 1 == len(state.levels) and state.levels[l].B > 1)):
                l += 1
            else:
                l -= 1
1026
1027
1028
1029

        if l >= len(state.levels):
            l = len(state.levels) - 1

1030
        # create a new level at the top with B=1, if necessary
1031
        if l == len(state.levels) - 1 and state.levels[l].B > 1:
1032
1033
            NB = state.levels[l].B if not state.overlap else 2 * state.levels[l].E
            state._NestedBlockState__rebuild_level(l, b=state.levels[l].g.new_vertex_property("int"))
1034
1035
1036
1037
1038
1039
1040
            mstate.mark_level(l + 1, done=False, state=state)
            l += 1

        mstate.l = l
        if checkpoint is not None:
            checkpoint(None, 0, 0, 0, mstate)

1041
1042
1043
1044
        if __test__:
            state._NestedBlockState__consistency_check("tree sweep step", l)

    # _Sf = state.entropy(dense=dense, multigraph=dense, dl_ent=dl_ent)
1045
1046
1047
1048
1049

    return dS



1050
1051
1052
1053
1054
1055
def init_nested_state(g, Bs, deg_corr=True, overlap=False, dl=False, dense=False,
                      multigraph=True, eweight=None, vweight=None,
                      clabel=None, nsweeps=10, epsilon=0., r=2, nmerge_sweeps=10,
                      c=0, sequential=True, sparse_thresh=100,
                      checkpoint=None, minimize_state=None, max_BE=1000,
                      verbose=False, **kwargs):
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
    r"""Initializes a nested block hierarchy with sizes given by ``Bs``.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        The graph being modelled.
    Bs : list of ``int`` (optional, default: ``None``)
        Number of blocks for each hierarchy level.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be used in the bottom level, otherwise the traditional variant will be used.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge weights (i.e. multiplicity).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex weights (i.e. multiplicity).
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    epsilon : ``float`` (optional, default: ``0``)
        Converge criterion for ``adaptive_sweeps``.
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    c : ``float`` (optional, default: ``0.``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    sparse_thresh : ``int`` (optional, default: ``100``)
        If the number of nodes in the higher level multigraphs exceeds this
        number, the sparse entropy will be used to find the best partition,
        but the dense entropy will be used to compare different partitions.
    checkpoint : function (optional, default: ``None``)
        If provided, this function will be called after each call to
        :func:`mcmc_sweep`. This can be used to store the current state, so it
        can be continued later. The function must have the following signature:

        .. code-block:: python

            def checkpoint(state, S, delta, nmoves, minimize_state):
                ...

        where `state` is either a :class:`~graph_tool.community.NestedBlockState`
        instance or ``None``, `S` is the current entropy value, `delta` is
        the entropy difference in the last MCMC sweep, and `nmoves` is the
        number of accepted block membership moves. The ``minimize_state``
        argument is a :class:`NestedMinimizeState` instance which specifies
        the current state of the algorithm, which can be stored via :mod:`pickle`,
        and supplied via the ``minimize_state`` option below to continue from an
        interrupted run.

        This function will also be called when the MCMC has finished for the
        current value of :math:`B`, in which case ``state == None``, and the
        remaining parameters will be zero, except the last.
    minimize_state : :class:`NestedMinimizeState` (optional, default: ``None``)
        If provided, this will specify an exact point of execution from which
        the algorithm will continue. The expected object is a
        :class:`NestedMinimizeState` instance which will be passed to the
        callback of the ``checkpoint`` option above, and  can be stored by
        :mod:`pickle`.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    state : :class:`~graph_tool.community.NestedBlockState`
       The initialized nested state.


    Notes
    -----

    This algorithm performs an agglomerative heuristic on each level  of the
    network, via the function :func:`~graph_tool.community.multilevel_minimize`.

    This algorithm has worst-case complexity of :math:`O(N\ln^2 N \times L)`,
    where  :math:`N` is the number of nodes in the network, and :math:`L` is
    the depth of the hierarchy.

    References
    ----------

1150
1151
1152
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
1153
1154
1155
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
1156
1157
1158
    .. [peixoto-model-2014] Tiago P. Peixoto, "Model selection and hypothesis
       testing for large-scale network models with overlapping groups",
       :arxiv:`1409.3059`.
1159
1160
    """

1161
1162
    dl_ent = kwargs.get("dl_ent", False)

1163
1164
1165
1166
1167
    if minimize_state is None:
        minimize_state = NestedMinimizeState()
    mstate = minimize_state

    state = NestedBlockState(g, eweight=eweight, vweight=vweight, Bs=[1],
1168
                             deg_corr=deg_corr, overlap=overlap, clabel=clabel)
1169

1170
    chkp = get_checkpoint_wrap(checkpoint, state, minimize_state, dl_ent)
1171
1172
1173
1174
1175

    bg = g
    ecount = eweight

    for l, B in enumerate(Bs):
1176
        ba = None
1177
1178
1179
        if l < len(mstate.bs):
            ba = mstate.bs[l]
        else:
1180
1181
1182
1183
1184
1185
            if l == 0:
                if state.overlap:
                    bstate = OverlapBlockState(bg, B=bg.num_vertices(), #b=bg.vertex_index.copy("int"),
                                               vweight=vweight,
                                               eweight=ecount,
                                               deg_corr=deg_corr != False,
1186
                                               #clabel=clabel,
1187
1188
1189
1190
1191
1192
                                               max_BE=max_BE)
                else:
                    bstate = BlockState(bg, B=bg.num_vertices(), #b=bg.vertex_index.copy("int"),
                                        vweight=vweight,
                                        eweight=ecount,
                                        deg_corr=deg_corr != False,
1193
                                        #clabel=clabel,
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
                                        max_BE=max_BE)
            else:
                bstate = state.levels[l-1].get_block_state(b=ba,
                                                           overlap=overlap == "full",
                                                           deg_corr=deg_corr == "full")[0]

            if B == 1:
                bstate.copy(b=bstate.g.new_vertex_property("int").a)
            else:
                bstate = multilevel_minimize(bstate, B, nsweeps=nsweeps, epsilon=epsilon,
                                             r=r, nmerge_sweeps=nmerge_sweeps,
                                             greedy=True, c=c, dl=dl,
                                             dense=(l > 0 and g.num_vertices() < sparse_thresh) or dense,
                                             multigraph=(l > 0 and g.num_vertices() < sparse_thresh) or multigraph,
                                             sequential=sequential, verbose=verbose,
                                             checkpoint=chkp,
                                             minimize_state=minimize_state.minimize_state)
1211
1212
            ba = array(bstate.b.fa)
            mstate.bs.append(ba)
1213
            minimize_state.clear_mstate()
1214

1215
        state._NestedBlockState__rebuild_level(len(state.levels) - 1, b=ba)
1216
1217
1218
        bg = state.levels[l].bg
        ecount = state.levels[l].mrs

1219
1220
1221
1222
1223

    for l, B in enumerate(Bs):
        if l + 1 < len(state.levels):
            assert state.levels[l].B == state.levels[l + 1].N

1224
1225
1226
1227
1228
1229
1230
    minimize_state.clear()
    mstate.sync(state)
    if checkpoint is not None:
        checkpoint(None, 0, 0, 0, mstate)

    return state

1231
1232
1233
1234
1235
1236
def minimize_nested_blockmodel_dl(g, Bs=None, bs=None, min_B=None, max_B=None,
                                  max_b=None, deg_corr=True, overlap=False,
                                  nonoverlap_init=True, dl=True,
                                  multigraph=True, dense=False, eweight=None,
                                  vweight=None, clabel=None, frozen_levels=None,
                                  nsweeps=10, epsilon=1e-3, c=0,
1237
1238
                                  nmerge_sweeps=10, r=2, sparse_thresh=100,
                                  sequential=True, verbose=False,
1239
1240
                                  checkpoint=None, minimize_state=None,
                                  **kwargs):
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
    r"""Find the block hierarchy of an unspecified size which minimizes the description
    length of the network, according to the nested stochastic blockmodel ensemble which
    best describes it.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph being used.
    Bs : list of ``int`` (optional, default: ``None``)
        Initial number of blocks for each hierarchy level.
    bs : list of :class:`~graph_tool.PropertyMap` or :class:`~numpy.ndarray` instances (optional, default: ``None``)
        Initial block labels on the vertices, for each hierarchy level.
1253
1254
1255
1256
1257
1258
1259
    min_B : ``int`` (optional, default: ``None``)
        Minimum number of blocks at the lowest level.
    max_B : ``int`` (optional, default: ``None``)
        Maximum number of blocks at the lowest level.
    max_b : ``int`` (optional, default: ``None``)
        Block partition used for the maximum number of blocks at the lowest
        level.
1260
    deg_corr : ``bool`` (optional, default: ``True``)
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
        If ``True``, the degree-corrected version of the blockmodel ensemble
        will be used in the bottom level, otherwise the traditional variant will
        be used.
    overlap : ``bool`` (optional, default: ``False``)
        If ``True``, the mixed-membership version of the blockmodel will be used.
    nonoverlap_init : ``bool`` (optional, default: ``True``)
        If ``True``, and `´overlap == True``, the minimization starts by first
        fitting the non-overlapping model, and using that as a starting state.
    dl : ``bool`` (optional, default: ``True``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
    multigraph : ``bool`` (optional, default: ``False``)
            If ``True``, the multigraph entropy will be used.
1274
1275
1276
1277
1278
1279
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge weights (i.e. multiplicity).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex weights (i.e. multiplicity).
1280
1281
1282
1283
1284
1285
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
    frozen_levels : :class:`list` (optional, default: ``None``)
        List of levels (``int``s) which will remain unmodified during the
        algorithm.
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    epsilon : ``float`` (optional, default: ``0``)
        The number of sweeps necessary for the local minimum will
        be estimated to be enough so that no more than ``epsilon * N`` nodes
        changes their states in the last ``nsweeps`` sweeps.
    c : ``float`` (optional, default: ``0.``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    sparse_thresh : ``int`` (optional, default: ``100``)
        If the number of blocks at some level is larger than this value, the
        sparse entropy will be used to find the best partition, but the dense
        entropy will be used to compare different partitions.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    checkpoint : function (optional, default: ``None``)
        If provided, this function will be called after each call to
        :func:`mcmc_sweep`. This can be used to store the current state, so it
        can be continued later. The function must have the following signature:

        .. code-block:: python

            def checkpoint(state, L, delta, nmoves, minimize_state):
                ...

        where `state` is either a :class:`~graph_tool.community.NestedBlockState`
        instance or ``None``, `L` is the current description length, `delta` is
        the entropy difference in the last MCMC sweep, and `nmoves` is the
        number of accepted block membership moves. The ``minimize_state``
        argument is a :class:`~graph_tool.community.NestedMinimizeState`
        instance which specifies the current state of the algorithm, which can
        be stored via :mod:`pickle`, and supplied via the ``minimize_state``
        option below to continue from an interrupted run.

        This function will also be called when the MCMC has finished for the
        current value of :math:`B`, in which case ``state == None``, and the
        remaining parameters will be zero, except the last.
    minimize_state : :class:`MinimizeState` (optional, default: ``None``)
        If provided, this will specify an exact point of execution from which
        the algorithm will continue. The expected object is a
        :class:`~graph_tool.community.NestedMinimizeState`
        instance which will be passed to the callback of the ``checkpoint``
        option above, and  can be stored by :mod:`pickle`.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------
1347
1348
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376

    Notes
    -----

    This algorithm attempts to find a block partition hierarchy of an unspecified size
    which minimizes the description length of the network,

    .. math::

       \Sigma = \mathcal{L}_{t/c} + \mathcal{S}_n,

    where :math:`\mathcal{S}_{n}` is the nested blockmodel entropy given by

    .. math::

       \mathcal{S}_n  = \mathcal{S}_{t/c}(\{e^0_{rs}\}, \{n^0_r\}) + \sum_{l=1}^LS_m(\{e^l_{rs}\}, \{n^l_r\}).

    with :math:`\mathcal{S}_{t/c}` and :math:`\mathcal{S}_{m}` described in the docstring of
    :meth:`~graph_tool.community.BlockState.entropy`, and :math:`\{e^l_{rs}\}`
    and :math:`\{n^l_r\}` are the edge and node counts at hierarchical level :math:`l`.
    Additionally :math:`\mathcal{L}_{t/c}` is the information necessary to
    describe the block partitions, i.e. :math:`\mathcal{L}_t=\sum_{l=0}^L\mathcal{L}^l_t`, with

    .. math::

        \mathcal{L}^l_t = \ln\left(\!\!{B_l\choose B_{l-1}}\!\!\right) + \ln B_{l-1}! - \sum_r \ln n_r^l!.


Tiago Peixoto's avatar
Tiago Peixoto committed
1377
    See [peixoto-hierarchical-2014]_ for details on the algorithm.
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390

    This algorithm has a complexity of :math:`O(N \ln^2 N)`, where :math:`N`
    is the number of nodes in the network.

    Examples
    --------
    .. testsetup:: nested_mdl

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: nested_mdl

Tiago Peixoto's avatar
Tiago Peixoto committed
1391
       >>> g = gt.collection.data["power"]
1392
1393
1394
1395
       >>> state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
       >>>
       >>> # route edges according to the hierarchy
       >>> bstack = state.get_bstack()
1396
1397
1398
1399
       >>> t = gt.get_hierarchy_tree(bstack)[0]
       >>> tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
       >>> cts = gt.get_hierarchy_control_points(g, t, tpos)
       >>> pos = g.own_property(tpos)
1400
       >>>
1401
1402
       >>> b = bstack[0].vp["b"]
       >>> gt.graph_draw(g, pos=pos, vertex_fill_color=b, vertex_shape=b, edge_control_points=cts,
Tiago Peixoto's avatar
Tiago Peixoto committed
1403
       ...               edge_color=[0, 0, 0, 0.3], vertex_anchor=0, output="power_nested_mdl.pdf")
1404
1405
1406
1407
       <...>

    .. testcleanup:: nested_mdl

Tiago Peixoto's avatar
Tiago Peixoto committed
1408
       gt.graph_draw(g, pos=pos, vertex_fill_color=b, vertex_shape=b, edge_control_points=cts, edge_color=[0, 0, 0, 0.3], vertex_anchor=0, output="power_nested_mdl.png")
1409

Tiago Peixoto's avatar
Tiago Peixoto committed
1410
    .. figure:: power_nested_mdl.*
1411
1412
       :align: center

Tiago Peixoto's avatar
Tiago Peixoto committed
1413
       Block partition of a power-grid network, which minimizes the description
1414
1415
       length of the network according to the nested (degree-corrected) stochastic blockmodel.

1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462

    .. doctest:: nested_mdl_overlap

       >>> g = gt.collection.data["power"]
       >>> state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True, overlap=True, dl=False)
       >>> bv, *rest, bc = state.levels[0].get_overlap_blocks()
       >>>
       >>> # get the dominant group for each node for the layout
       >>> n_r = np.zeros(state.levels[0].B)
       >>> b = g.new_vertex_property("int")
       >>> d = g.new_vertex_property("int")
       >>> for v in g.vertices():
       ...     i = bc[v].a.argmax()
       ...     b[v] = bv[v][i]
       ...     n_r[b[v]] += 1
       ...     d[v] = len(bv[v])
       >>> g.vp["b"] = b
       >>> bstack = state.get_bstack()
       >>> bstack[0] = g
       >>>
       >>> # route edges according to the hierarchy
       >>> t = gt.get_hierarchy_tree(bstack)[0]
       >>> tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
       >>> cts = gt.get_hierarchy_control_points(g, t, tpos)
       >>> pos = g.own_property(tpos)
       >>>
       >>> eg = gt.get_block_edge_gradient(g, state.levels[0].get_edge_blocks())
       >>> gt.graph_draw(g, pos=pos, vertex_pie_fractions=bc,
       ...               vertex_pie_colors=bv, vertex_shape="pie",
       ...               vertex_size=10, vertex_anchor=0, vorder=d,
       ...               edge_gradient=eg, edge_control_points=cts,
       ...               output="power_nested_mdl_overlap.pdf")
       <...>

    .. testcleanup:: nested_mdl_overlap

       gt.graph_draw(g, pos=pos, vertex_pie_fractions=bc, vertex_pie_colors=bv, vertex_shape="pie", edge_gradient=eg, edge_control_points=cts, vertex_size=10, vertex_anchor=0, vorder=d, output="power_nested_mdl_overlap.png")

    .. figure:: power_nested_mdl_overlap.*
       :align: center

       Overlapping block partition of a power-grid network, which minimizes the
       description length of the network according to the nested
       (degree-corrected, overlapping) stochastic blockmodel.



1463
1464
    References
    ----------
Tiago Peixoto's avatar
Tiago Peixoto committed
1465
1466
1467
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
1468
1469
1470
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
1471
1472
1473
    .. [peixoto-model-2014] Tiago P. Peixoto, "Model selection and hypothesis
       testing for large-scale network models with overlapping groups",
       :arxiv:`1409.3059`.
1474
1475
    """

1476
    dl_ent = kwargs.get("dl_ent", False)
1477

1478
1479
1480
1481
1482
1483
    if minimize_state is None:
        minimize_state = NestedMinimizeState()

    if overlap and nonoverlap_init and minimize_state.init and bs is None:
        if verbose:
            print("Non-overlapping initialization...")
1484
1485
        state = minimize_nested_blockmodel_dl(g, Bs=Bs, bs=bs,
                                              min_B=min_B,
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
                                              max_B=max_B,
                                              deg_corr=deg_corr, overlap=False,
                                              dl=dl, dense=dense,
                                              multigraph=multigraph,
                                              eweight=eweight,
                                              vweight=vweight,
                                              clabel=clabel if isinstance(clabel, PropertyMap) else None,
                                              nsweeps=nsweeps,
                                              epsilon=epsilon, c=c,
                                              nmerge_sweeps=nmerge_sweeps, r=r,
                                              sparse_thresh=sparse_thresh,
                                              sequential=sequential,
                                              verbose=verbose,
                                              checkpoint=checkpoint,
                                              minimize_state=minimize_state,
                                              dl_ent=dl_ent)
        if overlap != "full":
            if clabel is not None:
                bstate = state.levels[0].copy(overlap=True, clabel=clabel)
            else:
                bstate = state.levels[0].copy(overlap=True,
                                              clabel=g.new_vertex_property("int"))
            unilevel_minimize(bstate, nsweeps=nsweeps,
                              epsilon=epsilon, c=c,
                              nmerge_sweeps=nmerge_sweeps,
                              dl=dl,
                              sequential=sequential)
            bs = [s.b.a for s in state.levels]
            bs[0] = bstate.b.a
1515

1516
1517
1518
1519
1520
        else:
            bstates = [bstate.copy(overlap=True) for bstate in state.levels]
            bs = [s.b.a for s in bstates]
        if nonoverlap_init != "full":
            bs = [bs[0], zeros(bs[0].max() + 1, dtype=bs[0].dtype)]
1521

1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
        Bs = [b.max() + 1 for b in bs]
        max_B = Bs[0]
        max_b = bs[0].copy()

        minimize_state.clear()
        minimize_state.init = False

        if verbose:
            print("Overlapping minimization starting from:")
            state.print_summary()

    if Bs is None:
1534
1535
        if minimize_state is not None:
            Bs = [ba.max() + 1 for ba in minimize_state.bs]
1536
1537
1538
1539
            if len(Bs) == 0:
                Bs = [1]
        else:
            Bs = [1]
1540
1541
1542
1543
1544
1545
1546
1547
1548

    if bs is not None:
        Bs = [ba.max() + 1 for ba in bs]

    if Bs[-1] > 1:
        Bs += [1]

    if bs is None:
        state = init_nested_state(g, Bs=Bs, deg_corr=deg_corr,
1549
1550
1551
1552
1553
1554
                                  overlap=overlap, eweight=eweight,
                                  vweight=vweight, clabel=clabel,
                                  verbose=verbose,
                                  nsweeps=nsweeps, nmerge_sweeps=nmerge_sweeps,
                                  dl=dl, dense=dense, multigraph=multigraph,
                                  epsilon=epsilon, sparse_thresh=sparse_thresh,
1555
                                  checkpoint=checkpoint,
1556
1557
                                  minimize_state=minimize_state,
                                  dl_ent=dl_ent)
1558
    else:
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
        state = NestedBlockState(g, bs=bs, deg_corr=deg_corr, overlap=overlap,
                                 eweight=eweight, vweight=vweight, clabel=clabel)

    minimize_state.sync(state)

    chkp = get_checkpoint_wrap(checkpoint, state, minimize_state, dl_ent)

    dS = nested_tree_sweep(state,
                           min_B=min_B,
                           max_B=max_B,
                           max_b=max_b,
                           verbose=verbose,
                           nsweeps=nsweeps,
                           nmerge_sweeps=nmerge_sweeps,
                           r=r, epsilon=epsilon,
                           dense=dense, dl=dl,
                           multigraph=multigraph,
                           sparse_thresh=sparse_thresh,
                           checkpoint=chkp,
                           minimize_state=minimize_state,
                           frozen_levels=frozen_levels,
                           dl_ent=dl_ent)
1581

1582