nested_blockmodel.py 76.4 KB
Newer Older
1
2
3
4
5
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2015 Tiago de Paula Peixoto <tiago@skewed.de>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
    range = xrange

26
27
from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap, \
    infect_vertex_property
28
from .. stats import label_self_loops
29
30
from .. generation import graph_union
from .. topology import shortest_path
31
32
33
34
35
36
37
38
39
import random
from numpy import *
import numpy
from scipy.optimize import fsolve, fminbound
import scipy.special
from collections import defaultdict
import copy

from . blockmodel import *
40
from . blockmodel import _bm_test
41
42
43
44
45
46
47

class NestedBlockState(object):
    r"""This class encapsulates the nested block state of a given graph.

    This must be instantiated and used by functions such as :func:`nested_mcmc_sweep`.

    The instances of this class contain a data member called ``levels``, which
48
49
50
    is a list of :class:`~graph_tool.community.BlockState` (or
    :class:`~graph_tool.community.OverlapBlockState`) instances, containing the
    entire nested hierarchy.
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge weights (i.e. multiplicity).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex weights (i.e. multiplicity).
    bs : list of :class:`~graph_tool.PropertyMap` or :class:`~numpy.ndarray` instances (optional, default: ``None``)
        Initial block labels on the vertices, for each hierarchy level.
    Bs : list of ``int`` (optional, default: ``None``)
        Number of blocks for each hierarchy level.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be used in the bottom level, otherwise the traditional variant will be used.
67
68
69
70
71
72
    overlap : ``bool`` (optional, default: ``False``)
        If ``True``, the mixed-membership version of the blockmodel will be used
        at the lowest level.
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
73
74
75
76
77
78
    max_BE : ``int`` (optional, default: ``1000``)
        If the number of blocks exceeds this number, a sparse representation of
        the block graph is used, which is slightly less efficient, but uses less
        memory,
    """

79
80
    def __init__(self, g, eweight=None, vweight=None, ec=None, bs=None, Bs=None,
                 deg_corr=True, overlap=False, layers=False, clabel=None,
81
                 max_BE=1000, **kwargs):
82
        L = len(Bs) if Bs is not None else len(bs)
83
84
85
        self.g = cg = g
        self.vweight = vcount = vweight
        self.eweight = ecount = eweight
86
87
        self.ec = ec
        self.layers = layers
88
89

        self.levels = []
90
91
92
        self.overlap = overlap
        self.deg_corr = deg_corr
        self.clabel = clabel if clabel is not None else g.new_vertex_property("int")
93
        self.ignore_degrees = kwargs.get("ignore_degrees", None)
94

95
96
        for l in range(L):
            Bl = Bs[l] if Bs is not None else None
97
98
            bl = None
            if bs is not None:
99
100
                if isinstance(bs[l], PropertyMap):
                    bl = cg.own_property(bs[l])
101
                else:
102
                    bl = bs[l]
103

104
            if l == 0:
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
                if ec is None:
                    if overlap:
                        state = OverlapBlockState(g, B=Bl, b=bl,
                                                  eweight=ecount,
                                                  vweight=vcount,
                                                  deg_corr=deg_corr != False,
                                                  clabel=self.clabel,
                                                  max_BE=max_BE)
                        self.clabel = state.clabel.copy()
                        state.clabel.a = 0
                    else:
                        state = BlockState(g, B=Bl, b=bl,
                                           eweight=ecount,
                                           vweight=vcount,
                                           deg_corr=deg_corr != False,
                                           #clabel=self.clabel,
121
122
                                           max_BE=max_BE,
                                           ignore_degrees=self.ignore_degrees)
123
                else:
124
125
126
127
128
129
130
131
132
133
134
135
136
137
                    state = CovariateBlockState(g, B=Bl, b=bl,
                                                ec=ec,
                                                layers=layers,
                                                eweight=ecount,
                                                vweight=vcount,
                                                deg_corr=deg_corr != False,
                                                clabel=self.clabel,
                                                overlap=overlap,
                                                max_BE=max_BE)
                    if overlap:
                        self.clabel = state.clabel.copy()
                        state.clabel.a = 0


138
            else:
139
140
141
                state = self.levels[-1].get_block_state(b=bl,
                                                        overlap=self.overlap == "full",
                                                        deg_corr=self.deg_corr == "full")[0]
142
                if _bm_test():
143
144
145
146
                    assert not state.deg_corr, "upper levels must be non-deg-corr"

            self.levels.append(state)

147
148
149
        # for l in range(len(self.levels) - 1):
        #     clabel = self.__project_partition(l, l+1)
        #     self.levels[l].clabel = clabel
150

151
152
153
154
155
156
157
158
159
        if ec is not None:
            self.ec = self.levels[0].ec

    def __repr__(self):
        return "<NestedBlockState object with %d %sblocks,%s%s for graph %s, with %d levels of sizes %s at 0x%x>" % \
            (self.levels[0].B, "overlapping " if self.overlap else "",
             " with %d %s," % (self.levels[0].C, "layers" if self.layers else "covariates") if self.ec is not None else "",
             " degree corrected," if self.deg_corr else "",
             str(self.g), len(self.levels), str([(s.N, s.B) for s in self.levels]), id(self))
160

161
162
163
164
165
166
167
168
169
170
171
172
173
174

    def __copy__(self):
        return self.copy()

    def __deepcopy__(self, memo):
        g = self.g.copy()
        eweight = g.own_property(self.eweight.copy()) if self.eweight is not None else None
        vweight = g.own_property(self.vweight.copy()) if self.vweight is not None else None
        clabel = g.own_property(self.clabel.copy())  if self.clabel is not None else None
        ec = g.own_property(self.ec.copy()) if self.ec is not None else None
        bstack = self.get_bstack()
        return self.copy(g=g, eweight=eweight, vweight=vweight, clabel=clabel,
                         ec=ec, bs=[s.vp.b.a for s in bstack])

175
    def copy(self, g=None, eweight=None, vweight=None, bs=None, ec=None,
176
             layers=None, deg_corr=None, overlap=None, clabel=None, **kwargs):
177
178
179
        r"""Copies the block state. The parameters override the state properties, and
         have the same meaning as in the constructor.."""

180
181
        if bs is None:
            bs = [s.b.a for s in self.levels]
182
183
184
185
186
        if overlap is None:
            overlap = self.overlap
        elif self.overlap and not overlap:
            raise ValueError("Cannot automatically convert overlapping nested state to nonoverlapping")
        elif not self.overlap and overlap:
187
            s = self.levels[0].copy(overlap=True)
188
189
190
            bs[0] = s.b.a
        if deg_corr is None:
            deg_corr = self.deg_corr
191
192
        if layers is None:
            layers = self.layers
193
194
195
196
        return NestedBlockState(self.g if g is None else g,
                                self.eweight if eweight is None else eweight,
                                self.vweight if vweight is None else vweight,
                                self.ec if ec is None else ec, bs,
197
198
                                layers=layers, deg_corr=deg_corr,
                                overlap=overlap, clabel=clabel,
199
200
201
                                max_BE=self.levels[0].max_BE,
                                ignore_degrees=kwargs.pop("ignore_degrees", self.ignore_degrees),
                                **kwargs)
202
203
204

    def __getstate__(self):
        state = dict(g=self.g,
205
                     ec=self.ec,
206
207
208
209
210
211
                     eweight=self.eweight,
                     vweight=self.vweight,
                     overlap=self.overlap,
                     bs=[array(s.b.a) for s in self.levels],
                     clabel=self.clabel,
                     deg_corr=self.deg_corr,
212
213
                     max_BE=self.levels[0].max_BE,
                     ignore_degrees=self.ignore_degrees)
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
        return state

    def __setstate__(self, state):
        self.__init__(**state)
        return state

    def __project_partition(self, l, j):
        """Project partition of level 'j' onto level 'l'"""
        if self.overlap != "full":
            b = self.levels[l].b.copy()
            for i in range(l + 1, j + 1):
                clabel = self.levels[i].b.copy()
                pmap(b, clabel)
        else:
            b = self.levels[j].b.copy()
        return b

    def __propagate_clabel(self, l):
        clabel = self.clabel.copy()
        for j in range(l):
234
235
            bg = self.levels[j].bg
            bclabel = bg.new_vertex_property("int")
236
237
238
239
240
241
242
            reverse_map(self.levels[j].b, bclabel)
            pmap(bclabel, clabel)
            clabel = bclabel
        return clabel

    def __consistency_check(self, op, l):

243
244
        print("consistency check after", op, "at level", l)

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
        for j in range(len(self.levels)):

            c_state = self.levels[j].copy()
            S1 = self.levels[j].entropy()
            S2 = c_state.entropy()

            assert abs(S1 - S2) < 1e-8 and not isnan(S2) and not isnan(S1), "inconsistency at level %d after %s of level %d, different entropies of copies! (%g, %g)" % (j, op, l, S1, S2)


            if self.levels[j].wr.a.min() == 0:
                print("WARNING: empty blocks at level", j)
            if self.levels[j].b.a.max() + 1 != self.levels[j].B:
                print("WARNING: b.max() + 1 != B at level", j, self.levels[j].b.max() + 1, self.levels[j].B)

        for j in range(len(self.levels) - 1):

            B = self.levels[j].b.a.max() + 1
            bg_state = self.levels[j].get_block_state(b=self.levels[j+1].b.copy(),
                                                      overlap=self.overlap == "full",
                                                      deg_corr=self.deg_corr == "full")[0]

            S1 = bg_state.entropy(dense=True and self.deg_corr != "full", multigraph=True)
            S2 = self.levels[j+1].entropy(dense=True and self.deg_corr != "full", multigraph=True)

            if self.levels[j].B != self.levels[j+1].N or S1 != S2:
                self.print_summary()

                from graph_tool.topology import similarity
                print(bg_state)
                print(self.levels[j+1])
                print("N, B:", bg_state.N, bg_state.B)
                print("N, B:", self.levels[j + 1].N, self.levels[j + 1].B)
                print("similarity:", similarity(bg_state.g, self.levels[j+1].g))
                print("b:", bg_state.b.a)
                print("b:", self.levels[j+1].b.a)

                print("wr:", bg_state.wr.a)
                print("wr:", self.levels[j+1].wr.a)

                print("mrs:", bg_state.mrs.a)
                print("mrs:", self.levels[j+1].mrs.a)

                print("eweight:", bg_state.eweight.a)
                print("eweight:", self.levels[j+1].eweight.a)

                print("vweight:", bg_state.vweight.a)
                print("vweight:", self.levels[j+1].vweight.a)


            assert abs(S1 - S2) < 1e-6 and not isnan(S2) and not isnan(S1), "inconsistency at level %d after %s of level %d, different entropies (%g, %g)" % (j, op, l, S1, S2)
            assert self.levels[j].B == self.levels[j+1].N, "inconsistency  at level %d after %s of level %d, different sizes" % (j + 1, op, l)


298
299
300
301
302
303
304
305
306
307
308
309
310
            # verify hierarchy / clabel consistency
            clabel = self.__project_partition(0, j)
            self.levels[0].clabel.a = self.clabel.a
            assert self.levels[0]._BlockState__check_clabel(),  "inconsistency at level %d after %s of level %d, clabel invalidated" % (j + 1, op, l)
            self.levels[0].clabel.a = 0

            # verify hierarchy consistency
            clabel = self.__project_partition(j, j + 1)
            self.levels[0].clabel.a = self.clabel.a
            assert self.levels[0]._BlockState__check_clabel(),  "inconsistency at level %d after %s of level %d, partition not compatible with upper level" % (j + 1, op, l)
            self.levels[0].clabel.a = 0


311
312
313
314
    def __rebuild_level(self, l, b, clabel=None):
        r"""Replace level ``l`` given the new partition ``b``, and the
        projected upper level partition clabel."""

315
        if _bm_test():
316
317
318
319
320
321
322
323
324
            assert clabel is not None or l == len(self.levels) - 1, "clabel not given for intermediary level"

        if clabel is None:
            clabel = self.levels[l].g.new_vertex_property("int")

        old_b = b.copy()

        state = self.levels[l].copy(b=b, clabel=clabel.a)
        self.levels[l] = state
325
326
        if l == 0:
            self.clabel = state.g.own_property(self.clabel)  # for CovariateBlockState
327
328
329
330
331
332
333
334
335
336

        # upper level
        bclabel = state.get_bclabel()
        bstate = self.levels[l].get_block_state(b=bclabel,
                                                overlap=self.overlap == "full",
                                                deg_corr=self.deg_corr == "full")[0]
        if l == len(self.levels) - 1:
            self.levels.append(None)
        self.levels[l + 1] = bstate

337
338
339
340
341
        self.levels[l].clabel.a = 0
        self.levels[l + 1].clabel.a = 0

        # if l + 1 < len(self.levels) - 1:
        #     self.levels[l + 1].clabel = self.__project_partition(l + 1, l + 2)
342
343
344
345
346
347
348


        if l + 1 < len(self.levels) - 1:
            bstate = self.levels[l + 1].get_block_state(b=self.levels[l + 2].b,
                                                        overlap=self.overlap == "full",
                                                        deg_corr=self.deg_corr == "full")[0]

349
            if _bm_test():
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
                from graph_tool.topology import similarity
                print("- similarity:", similarity(bstate.g, self.levels[l + 2].g))


                if abs(bstate.entropy() - self.levels[l + 2].entropy()) > 1e-6:
                    print("********************** inconsistent rebuild! **************************")

                    print(bstate.b.a)
                    print(self.levels[l + 2].b.a)

                    print(bstate.wr.a)
                    print(self.levels[l + 2].wr.a)

                    print(bstate.eweight.a)
                    print(self.levels[l + 2].eweight.a)

                    nclabel = self.__project_partition(l, l + 1)
                    print(nclabel.a)
                    print(clabel.a)
                    print(self.levels[l].b.a)
                    print(self.levels[l+1].b.a)
                    print(self.levels[l+2].b.a)
                    print(bstate.b.a)
                print ("DS", l, l + 1, bstate.entropy(), self.levels[l + 2].entropy())

        B = self.levels[l].B

377
        if _bm_test():
378
            self.__consistency_check("rebuild", l)
379
380
381
382

    def __delete_level(self, l):
        if l == 0:
            raise ValueError("cannot delete level l=0")
383
384
385
386
387
388
        b = self.__project_partition(l - 1, l)
        if l < len(self.levels) - 1:
            clabel = self.__project_partition(l - 1, l + 1)
        else:
            clabel = None

389
390
        del self.levels[l]

391
392
        self.__rebuild_level(l - 1, b=b, clabel=clabel)

393
        if _bm_test():
394
            self.__consistency_check("delete", l)
395

396
    def __duplicate_level(self, l):
397
        assert l > 0, "attempted to duplicate level 0"
398
399
400
        if not self.levels[l].overlap:
            bstate = self.levels[l].copy(b=self.levels[l].g.vertex_index.copy("int"))
        else:
401
            bstate = self.levels[l].copy(b=arange(self.levels[l].g.num_vertices()))
402
        self.levels.insert(l, bstate)
403

404
        if _bm_test():
405
            self.__consistency_check("duplicate", l)
406
407


408
409
    def level_entropy(self, l, complete=True, dense=False, multigraph=True,
                      norm=True, dl_ent=False):
410
411
412
413
414
415
416
417
418
419
420
        r"""Compute the description length of hierarchy level l.

        Parameters
        ----------
        l : ``int``
            Hierarchy level.
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
421
422
423
424
425
426
427
428
        multigraph : ``bool`` (optional, default: ``True``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.
429
430
431
432
        """

        bstate = self.levels[l]

433
        S = bstate.entropy(dl=True, edges_dl=False,
434
                           dense=dense or (l > 0 and self.deg_corr != "full"),
435
                           multigraph=multigraph or l > 0,
436
                           complete=complete or (l > 0 and self.deg_corr == "full"),
437
                           norm=norm, dl_ent=dl_ent)
438
439
        return S

440
    def entropy(self, complete=True, dense=False, multigraph=True, norm=False,
441
                dl_ent=False):
442
443
444
445
446
447
448
449
450
        r"""Compute the description length of the entire hierarchy.

        Parameters
        ----------
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
451
452
453
454
455
456
457
458
        multigraph : ``bool`` (optional, default: ``True``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.
459
460
461
462
        """

        S = 0
        for l in range(len(self.levels)):
463
464
465
            S += self.level_entropy(l, complete=complete, dense=dense,
                                    multigraph=multigraph, norm=norm,
                                    dl_ent=dl_ent)
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
        return S

    def get_bstack(self):
        r"""Return the nested levels as individual graphs.

        This returns a list of :class:`~graph_tool.Graph` instances
        representing the inferred hierarchy at each level. Each graph has two
        internal vertex and edge property maps named "count" which correspond to
        the vertex and edge counts at the lower level, respectively. Additionally,
        an internal vertex property map named "b" specifies the block partition.
        """

        bstack = []
        for l, bstate in enumerate(self.levels):
            cg = bstate.g
            if l == 0:
                cg = GraphView(cg, skip_properties=True)
            cg.vp["b"] = bstate.b.copy()
            cg.ep["count"] = bstate.eweight
485
            if bstate.overlap:
486
487
488
489
                if self.ec is None:
                    cg.vp["node_index"] = bstate.node_index.copy()
                else:
                    cg.vp["node_index"] = bstate.total_state.node_index.copy()
490

491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
            bstack.append(cg)
            if bstate.N == 1:
                break
        if bstack[-1].num_vertices() > 1:
            cg = Graph(directed=bstack[-1].is_directed())
            cg.add_vertex()
            cg.vp["b"] = cg.new_vertex_property("int")
            e = cg.add_edge(0, 0)
            ew = cg.new_edge_property("int")
            ew[e] = self.levels[-1].E
            cg.ep["count"] = ew
            bstack.append(cg)
        return bstack


    def project_level(self, l):
507
508
509
        r"""Project the partition at level ``l`` onto the lowest level, and return the
        corresponding :class:`~graph_tool.community.BlockState` (or
        :class:`~graph_tool.community.OverlapBlockState`).  """
510
        if self.overlap != "full":
511
512
513
514
515
516
517
518
            clabel = b = self.levels[l].b.copy()
            while l - 1 >= 0:
                clabel = b
                b = self.levels[l - 1].b.copy()
                pmap(b, clabel)
                l -= 1
        else:
            b = self.levels[l].b.copy()
519
        state = self.levels[0].copy(b=b.a)
520
        return state
521

522
523
524
525
526
527
528
529
530
531
532
533
534
    def merge_layers(self, l_src, l_tgt, revert=False):
        ctxs = []
        for state in self.levels:
            ctxs.append(state.merge_layers(l_src, l_tgt, revert))
        if revert:
            if hasattr(contextlib, "nested"):
                return contextlib.nested(*ctxs)
            else:
                with contextlib.ExitStack() as stack:
                    for ctx in ctxs:
                        stack.enter_context(ctx)
                    return stack.pop_all()

535
536
537
    def print_summary(self):
        for l, state in enumerate(self.levels):
            print("l: %d, N: %d, B: %d" % (l, state.N, state.B))
538

539

540
541
def nested_mcmc_sweep(state, beta=1., c=1., dl=False, sequential=True,
                      parallel=False, verbose=False):
542
543
544
545
546
547
548
549
550
551
552
553
554
    r"""Performs a Markov chain Monte Carlo sweep on all levels of the hierarchy.

    Parameters
    ----------
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
    beta : `float` (optional, default: `1.0`)
        The inverse temperature parameter :math:`\beta`.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
555
556
557
    dl : ``bool`` (optional, default: ``False``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS_moves : list of (``float``, ``int``) tuples
       The entropy difference (per edge) and number of accepted block membership
       moves after a full sweep for each level.


    Notes
    -----

    This algorithm performs a Markov chain Monte Carlo sweep on each level of the
    network, via the function :func:`~graph_tool.community.mcmc_sweep`.

    This algorithm has a worse-case complexity of :math:`O(E \times L)`, where
    :math:`E` is the number of edges in the network, and :math:`L` is the depth
    of the hierarchy.

    Examples
    --------
    .. testsetup:: nested_mcmc

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: nested_mcmc

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.NestedBlockState(g, Bs=[10, 5, 3, 2, 1], deg_corr=True)
       >>> ret = gt.nested_mcmc_sweep(state)
       >>> print(ret)
597
       [(0.0, 0), (0.0, 0), (0.0, 0), (0.0, 0), (0.0, 0)]
598
599
600
601

    References
    ----------

602
603
604
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
605
606
607
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
608
    .. [peixoto-model-2015] Tiago P. Peixoto, "Model selection and hypothesis
609
       testing for large-scale network models with overlapping groups",
610
       Phys. Rev. X 5, 011033 (2015), :doi:`10.1103/PhysRevX.5.011033`,
611
       :arxiv:`1409.3059`.
612
613
614
615
616
617
    """

    rets = []
    for l, bstate in enumerate(state.levels):
        if verbose:
            print("Level:", l, "N:", bstate.N, "B:", bstate.B)
618
619
620
621
622
623
624
625
626
627
628
629
630
631

        # constraint partitions not to invalidate upper layers
        if l < len(state.levels) - 1:
            clabel = state._NestedBlockState__project_partition(l, l + 1)
        else:
            clabel = bstate.g.new_vertex_property("int")

        # propagate externally imposed clabel at the bottom
        cclabel = state._NestedBlockState__propagate_clabel(l)
        cclabel.a += clabel.a * (cclabel.a.max() + 1)
        continuous_map(cclabel)

        bstate.clabel = cclabel

632
633
        ret = mcmc_sweep(bstate, beta=beta, c=c, dl=dl,
                         dense = l > 0 and state.deg_corr != "full",
634
                         multigraph = l > 0,
635
636
                         sequential=sequential, parallel=parallel,
                         verbose=verbose)
637

638
        bstate.clabel.a = 0
639

640
641
642
        rets.append(ret)
    return rets

643
def replace_level(l, state, min_B=None, max_B=None, max_b=None, nsweeps=10,
644
645
646
647
648
                  nmerge_sweeps=10, adaptive_sweeps=True, r=2, c=0, epsilon=0.,
                  sequential=True, parallel=False, dl=False, dense=False,
                  multigraph=True, sparse_thresh=100, verbose=False,
                  checkpoint=None, minimize_state=None, dl_ent=False,
                  confine_layers=False):
649
    r"""Replaces level l with another state with a possibly different number of
650
651
652
    groups. This may change not only the state at level l, but also the one at
    level l + 1, which needs to be 'rebuilt' because of the label changes at
    level l."""
653

654
    if _bm_test():
655
656
        state._NestedBlockState__consistency_check("(before) replace level", l)

657
658
    bstate = state.levels[l]
    g = bstate.g
659
660
    base_g = g if not bstate.overlap else bstate.base_g
    eweight = bstate.eweight if not bstate.overlap else None
661
662
663
664
665
666
    ec = None
    if state.ec is not None:
        if bstate.overlap:
            ec = bstate.base_ec
        else:
            ec = bstate.ec
667

668
669
670
671
672
673
674
    if l > 0 or min_B is None:
        if l + 1 < len(state.levels):
            min_B = state.levels[l + 1].B
        else:
            min_B = 1
    if l > 0 or max_B is None:
        max_B = bstate.N
675

676
677
678
    min_B = max(min_B, state.clabel.a.max() + 1)

    # constraint partitions not to invalidate upper layers
679
680
    if l < len(state.levels) - 1:
        clabel = state._NestedBlockState__project_partition(l, l + 1)
681
    else:
682
683
        clabel = bstate.g.new_vertex_property("int")

684
685
    assert min_B <= max_B, (min_B, max_B, bstate.B, bstate.N, g.num_vertices(),
                            state.clabel.a.max() + 1, clabel.a.max() + 1, l)
686

687
    # propagate externally imposed clabel at the bottom
688
    cclabel = state._NestedBlockState__propagate_clabel(l)
689
    assert cclabel.a.max() <= state.clabel.a.max(),  (cclabel.a.max(), state.clabel.a.max())
690
691
    cclabel.a += clabel.a * (cclabel.a.max() + 1)
    continuous_map(cclabel)
692

693
694
    min_B = max(min_B, cclabel.a.max() + 1)

695
696
    assert min_B <= max_B, (min_B, max_B, bstate.B, bstate.N, g.num_vertices(),
                            cclabel.a.max() + 1, state.clabel.a.max() + 1, clabel.a.max() + 1, l)
697

698
    if _bm_test():
699
        assert bstate._BlockState__check_clabel(), "invalid clabel before minimize!"
700
701
702

    nested_dl = l < len(state.levels) - 1

703
704
    state.levels[l].clabel.a = cclabel.a

705
706
707
708
709
710
711
    Sb = get_b_dl(state.levels[l],
                  dense=(l > 0 and state.deg_corr != "full") or dense,
                  multigraph=l > 0 or multigraph,
                  nested_dl=nested_dl,
                  nested_overlap=state.overlap == "full",
                  dl_ent=dl_ent)

712
713
    state.levels[l].clabel.a = 0

714
    if _bm_test():
715
716
717
        assert clabel.a.max() + 1 <= min_B

    if l == 0 and max_b is not None:
718
719
        istate = state.levels[0].copy(b=max_b.copy(),
                                      clabel=cclabel.a if state.overlap else cclabel)
720
721
        if _bm_test():
            assert istate._BlockState__check_clabel(), "invalid clabel!"
722
723
        if istate.B > max_B:
            istate = multilevel_minimize(istate, B=max_B, nsweeps=nsweeps,
724
725
                                         nmerge_sweeps=nmerge_sweeps,
                                         adaptive_sweeps=adaptive_sweeps, c=c,
726
                                         r=r, dl=dl, sequential=sequential,
727
728
729
                                         parallel=parallel,
                                         greedy_cooling=True, epsilon=epsilon,
                                         confine_layers=confine_layers,
730
                                         verbose=verbose=="full")
731
732
            if _bm_test():
                assert istate._BlockState__check_clabel(), "invalid clabel!"
733
734
735
        init_states = [istate]
    else:
        init_states = None
736

737
738
739
740
741
    res = minimize_blockmodel_dl(base_g,
                                 ec=ec,
                                 layers=state.layers,
                                 confine_layers=confine_layers,
                                 eweight=eweight,
742
743
744
                                 deg_corr=bstate.deg_corr,
                                 nsweeps=nsweeps,
                                 nmerge_sweeps=nmerge_sweeps,
745
                                 adaptive_sweeps=adaptive_sweeps,
746
                                 c=c, r=r, sequential=sequential,
747
748
                                 parallel=parallel,
                                 greedy_cooling=True,
749
                                 epsilon=epsilon,
750
751
                                 max_B=max_B,
                                 min_B=min_B,
752
                                 clabel=cclabel if not bstate.overlap else cclabel.a,
753
                                 max_BE=bstate.max_BE,
754
755
756
757
                                 dl=dl,
                                 dense=(l > 0 and state.deg_corr != "full") or dense,
                                 multigraph=l > 0 or multigraph,
                                 sparse_heuristic=base_g.num_vertices() > sparse_thresh,
758
                                 nested_dl=nested_dl,
759
760
                                 overlap=bstate.overlap,
                                 nested_overlap=state.overlap == "full",
Tiago Peixoto's avatar
Tiago Peixoto committed
761
                                 nonoverlap_compare=False,
762
763
764
                                 nonoverlap_init=False,
                                 init_states=init_states,
                                 verbose=verbose=="full",
765
                                 ##exaustive=g.num_vertices() <= 100,
766
                                 #minimize_state=minimize_state.minimize_state, >>>>>> HERE <<<<<
767
                                 checkpoint=checkpoint,
768
769
                                 dl_ent=dl_ent,
                                 ignore_degrees=state.ignore_degrees if l == 0 else None)
770

771
    if _bm_test():
772
        assert (res.clabel.a == cclabel.a).all(), (res.clabel.a, cclabel.a)
773
        assert res._BlockState__check_clabel(), "invalid clabel after minimize!"
774

775
776
777
778
779
780
    Sf = get_b_dl(res,
                  dense=(l > 0 and state.deg_corr != "full") or dense,
                  multigraph=l > 0 or multigraph,
                  nested_dl=nested_dl,
                  nested_overlap=state.overlap == "full",
                  dl_ent=dl_ent)
781

782
783
784
785
786
787
    res.clabel.a = 0
    if state.ec is not None:
        for s in res.states:
            s.clabel.a = 0
    b = res.b

788
789
790
791
792
    kept = False
    if Sf - Sb >= -1e-10:
        kept = True
    if Sf - Sb == 0 and bstate.B != state.levels[l].B:
        kept = False
793
794
    if res.B == res.N:
        kept = True
795
796

    if kept:
797
        Sf_rej = Sf
798
799
        Sf = Sb
    else:
800
801
        # rebuild current level
        state._NestedBlockState__rebuild_level(l, b=b, clabel=clabel)
802
803

    if verbose:
804
805
806
807
808
        print("level", l, ": resizing", bstate.B, "->", state.levels[l].B, ", dS:", Sf - Sb, end="")
        if kept:
            print(" [kept, rejected (%d, %g) vs (%d, %g)]" % (res.B, Sf_rej, bstate.B, Sb))
        else:
            print()
809

810
    if _bm_test():
811
812
        state._NestedBlockState__consistency_check("replace level", l)

813
814
815
816
817
818
819
820
821
822
823
    dS = Sf - Sb
    return dS, kept


class NestedMinimizeState(object):
    r"""This object stores information regarding the current entropy minimization
    state, so that the algorithms can resume previously started runs.
    This object can be saved to disk via the :mod:`pickle` interface."""

    def __init__(self):
        self.minimize_state = MinimizeState()
824
        self.l = 0
825
826
        self.bs = []
        self.done = []
827
        self.init = True
828
829
830

    def clear(self):
        self.minimize_state.clear()
831
        self.l = 0
832
833
        del self.bs[:]
        del self.done[:]
834
835
836
837

    def sync(self, state):
        if len(self.bs) == 0:
            for s in state.levels:
838
                self.bs.append(array(s.b.fa))
839
840
841
842
843
844
845
846
847
        while len(self.done) < len(state.levels):
            self.done.append(False)

    def delete(self, l):
        del self.done[l]
        del self.bs[l]

    def insert(self, l, state):
        self.done.insert(l + 1, False)
848
849
        ba = array(state.levels[l].b.fa)
        self.bs.insert(l + 1, ba)
850
851
852

    def mark_level(self, l, done, state):
        while len(state.levels) > len(self.bs):
853
854
            ba = array(state.levels[len(self.bs)].b.fa)
            self.bs.append(ba)
855
856
857
858
859
860
861
            self.done.append(False)

        self.done[l] = done
        if done:
            self.bs[l] = array(state.levels[l].b.fa)

    def clear_mstate(self):
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
        self.minimize_state.clear()

def get_checkpoint_wrap(checkpoint, state, minimize_state, dl_ent):
    S_total = state.entropy(complete=True, dl_ent=dl_ent)

    if checkpoint is not None:
        def check_wrap(bstate, Sb, delta, nmoves, ms):
            l = minimize_state.l
            bstate = state.levels[l]
            S_l = bstate.entropy()
            S = S_total - S_l + Sb
            if bstate is None:
                checkpoint(None, S, delta, nmoves, minimize_state)
            else:
                checkpoint(state, S, delta, nmoves, minimize_state)
        chkp = check_wrap
    else:
        chkp = None

    return chkp
882

883
884

def nested_tree_sweep(state, min_B=None, max_B=None, max_b=None, nsweeps=10,
885
886
887
888
889
                      epsilon=0., r=2., nmerge_sweeps=10, adaptive_sweeps=True,
                      c=0, dl=False, dense=False, multigraph=True,
                      sequential=True, parallel=False, sparse_thresh=100,
                      checkpoint=None, minimize_state=None, frozen_levels=None,
                      confine_layers=False, verbose=False, **kwargs):
890
891
892
893
894
895
896
    r"""Performs one greedy sweep in the entire hierarchy tree, attempting to
    decrease its description length.

    Parameters
    ----------
    state : :class:`~graph_tool.community.NestedBlockState`
        The nested block state.
897
898
899
900
901
902
903
    min_B : ``int`` (optional, default: ``None``)
        Minimum number of blocks at the lowest level.
    max_B : ``int`` (optional, default: ``None``)
        Maximum number of blocks at the lowest level.
    max_b : ``int`` (optional, default: ``None``)
        Block partition used for the maximum number of blocks at the lowest
        level.
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    epsilon : ``float`` (optional, default: ``0``)
        Converge criterion for ``adaptive_sweeps``.
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    sparse_thresh : ``int`` (optional, default: ``100``)
        If the number of nodes in the higher level multigraphs exceeds this
        number, the sparse entropy will be used to find the best partition,
        but the dense entropy will be used to compare different partitions.
    checkpoint : function (optional, default: ``None``)
        If provided, this function will be called after each call to
        :func:`mcmc_sweep`. This can be used to store the current state, so it
        can be continued later. The function must have the following signature:

        .. code-block:: python

            def checkpoint(state, S, delta, nmoves, minimize_state):
                ...

        where `state` is either a :class:`~graph_tool.community.NestedBlockState`
        instance or ``None``, `S` is the current entropy value, `delta` is
        the entropy difference in the last MCMC sweep, and `nmoves` is the
        number of accepted block membership moves. The ``minimize_state``
        argument is a :class:`NestedMinimizeState` instance which specifies
        the current state of the algorithm, which can be stored via :mod:`pickle`,
        and supplied via the ``minimize_state`` option below to continue from an
        interrupted run.

        This function will also be called when the MCMC has finished for the
        current value of :math:`B`, in which case ``state == None``, and the
        remaining parameters will be zero, except the last.
    minimize_state : :class:`NestedMinimizeState` (optional, default: ``None``)
        If provided, this will specify an exact point of execution from which
        the algorithm will continue. The expected object is a
        :class:`NestedMinimizeState` instance which will be passed to the
        callback of the ``checkpoint`` option above, and  can be stored by
        :mod:`pickle`.
960
961
962
    frozen_levels : :class:`list` (optional, default: ``None``)
        List of levels (``int``s) which will remain unmodified during the
        algorithm.
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS : ``float``
       The description length difference (per edge) after the move.


    Notes
    -----

    This algorithm performs a constrained agglomerative heuristic on each level
    of the network, via the function :func:`~graph_tool.community.multilevel_minimize`.

    This algorithm has worst-case complexity of :math:`O(N\ln^2 N \times L)`,
    where  :math:`N` is the number of nodes in the network, and :math:`L` is
    the depth of the hierarchy.

    References
    ----------

986
987
988
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
Tiago Peixoto's avatar
Tiago Peixoto committed
989
990
991
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.
992
    .. [peixoto-model-2015] Tiago P. Peixoto, "Model selection and hypothesis
993
       testing for large-scale network models with overlapping groups",
994
       Phys. Rev. X 5, 011033 (2015), :doi:`10.1103/PhysRevX.5.011033`,
995
       :arxiv:`1409.3059`.
996
997
    """

998
999
    dl_ent = kwargs.get("dl_ent", False)

1000
    if minimize_state is None:
For faster browsing, not all history is shown. View entire blame