blockmodel.py 93.7 KB
Newer Older
1 2 3 4 5
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2017 Tiago de Paula Peixoto <tiago@skewed.de>
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
    range = xrange

from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap, \
27 28
    conv_pickle_state, Vector_size_t, Vector_double, group_vector_property, \
    perfect_prop_hash
29 30
from .. generation import condensation_graph, random_rewire, generate_sbm
from .. stats import label_self_loops, remove_parallel_edges, remove_self_loops
31 32 33 34 35
from .. spectral import adjacency
import random
from numpy import *
import numpy
import copy
36
import collections
37
from collections import OrderedDict
38
import itertools
39
import warnings
40 41 42 43 44 45

from . util import *

from .. dl_import import dl_import
dl_import("from . import libgraph_tool_inference as libinference")

46 47
from . libgraph_tool_inference import PartitionHist, BlockPairHist

48 49 50 51 52 53 54 55 56 57
__test__ = False

def set_test(test):
    global __test__
    __test__ = test

def _bm_test():
    global __test__
    return __test__

58
def get_block_graph(g, B, b, vcount=None, ecount=None, rec=None, drec=None):
59 60 61 62
    if isinstance(ecount, libinference.unity_eprop_t):
        ecount = None
    if isinstance(vcount, libinference.unity_vprop_t):
        vcount = None
63 64 65
    avprops = []
    if vcount is not None:
        avprops.append(vcount)
66
    aeprops = []
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    if ecount is not None:
        aeprops.append(ecount)
    if rec is not None:
        aeprops.append(rec)
    if drec is not None:
        aeprops.append(drec)
    cg, br, vc, ec, av, ae = condensation_graph(g, b,
                                                avprops=avprops,
                                                aeprops=aeprops,
                                                self_loops=True)
    if vcount is not None:
        vcount = av[0]
        del av[0]
    else:
        vcount = vc
82
    cg.vp.count = vcount
83 84 85 86 87 88

    if ecount is not None:
        ecount = ae[0]
        del ae[0]
    else:
        ecount = ec
89
    cg.ep.count = ecount
90 91 92 93 94 95 96

    if rec is not None:
        cg.ep.rec = ae[0]
        del ae[0]

    if drec is not None:
        cg.ep.drec = ae[0]
97 98
        del ae[0]

99 100 101 102 103
    cg = Graph(cg, vorder=br)

    cg.add_vertex(B - cg.num_vertices())
    return cg

Tiago Peixoto's avatar
Tiago Peixoto committed
104 105 106
def get_entropy_args(kargs):
    kargs = kargs.copy()
    args = DictState(kargs)
107
    deg_dl_kind = args.degree_dl_kind
Tiago Peixoto's avatar
Tiago Peixoto committed
108
    del kargs["degree_dl_kind"]
109 110 111 112 113 114 115 116 117 118 119
    if deg_dl_kind == "entropy":
        kind = libinference.deg_dl_kind.ent
    elif deg_dl_kind == "uniform":
        kind = libinference.deg_dl_kind.uniform
    elif deg_dl_kind == "distributed":
        kind = libinference.deg_dl_kind.dist
    ea = libinference.entropy_args()
    ea.exact = args.exact
    ea.dense = args.dense
    ea.multigraph = args.multigraph
    ea.adjacency = args.adjacency
120
    ea.recs = args.recs
Tiago Peixoto's avatar
Tiago Peixoto committed
121 122 123 124
    del kargs["exact"]
    del kargs["dense"]
    del kargs["multigraph"]
    del kargs["adjacency"]
125
    del kargs["recs"]
126 127 128 129 130 131 132 133 134
    if args.dl:
        ea.partition_dl = args.partition_dl
        ea.degree_dl = args.degree_dl
        ea.edges_dl = args.edges_dl
    else:
        ea.partition_dl = False
        ea.degree_dl = False
        ea.edges_dl = False
    ea.degree_dl_kind = kind
Tiago Peixoto's avatar
Tiago Peixoto committed
135 136 137 138
    del kargs["dl"]
    del kargs["partition_dl"]
    del kargs["degree_dl"]
    del kargs["edges_dl"]
139
    kargs.pop("callback", None)
Tiago Peixoto's avatar
Tiago Peixoto committed
140 141 142
    if len(kargs) > 0:
        raise ValueError("unrecognized entropy arguments: " +
                         str(list(kargs.keys())))
143 144 145 146 147 148 149 150
    return ea

_q_cache_max_n = 10000
def init_q_cache(max_n=None):
    if max_n is None:
        max_n = _q_cache_max_n
    libinference.init_q_cache(min(_q_cache_max_n, max_n))

151 152 153 154 155 156 157 158 159 160 161 162 163
class BlockState(object):
    r"""The stochastic block model state of a given graph.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be modelled.
    b : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Initial block labels on the vertices. If not supplied, it will be
        randomly sampled.
    B : ``int`` (optional, default: ``None``)
        Number of blocks (or vertex groups). If not supplied it will be obtained
        from the parameter ``b``.
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge multiplicities (for multigraphs or block graphs).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex multiplicities (for block graphs).
    recs : list of :class:`~graph_tool.PropertyMap` instances (optional, default: ``[]``)
        List of real or discrete-valued edge covariates.
    rec_types : list of edge covariate types (optional, default: ``[]``)
        List of types of edge covariates. The possible types are:
        ``"real-exponential"``, ``"real-normal"``, ``"discrete-geometric"``,
        ``"discrete-poisson"`` or ``"discrete-binomial"``.
    rec_params : list of ``dict`` (optional, default: ``[]``)
        Model hyperparameters for edge covariates. This should a list of
        ``dict`` instances. The keys depend on the type of edge covariate:

        ``"real-exponential"`` or ``"discrete-poisson"``
            The parameter list is ``["r", "theta"]``, corresponding to the
            parameters of the `Gamma
            <https://en.wikipedia.org/wiki/Gamma_distribution>`_ prior
            distribution.  If unspecified, the default is the "empirical Bayes"
            choice: ``r = 1.0`` and ``theta`` is the global average of the edge
            covariate.

        ``"discrete-geometric"``
            The parameter list is ``["alpha", "beta"]``, corresponding to the
            parameters of the `Beta
            <https://en.wikipedia.org/wiki/Beta_distribution>`_ prior
            distribution. If unspecified, the default is the noninformative
            choice: ``alpha = beta = 1.0``

        ``"discrete-binomial"``
            The parameter list is ``["N", "alpha", "beta"]``, corresponding to
            the number of trials ``N`` and the parameters of the `Beta
            <https://en.wikipedia.org/wiki/Beta_distribution>`_ prior
            distribution. If unspecified, the default is the noninformative
            choice, ``alpha = beta = 1.0``, and ``N`` is taken to be the maximum
            edge covarite value.

        ``"real-normal"``
            The parameter list is ``["m0", "k0", "v0", "nu0"]`` corresponding to
            the `normal
            <https://en.wikipedia.org/wiki/Normal_distribution>`_-`inverse-chi-squared
            <https://en.wikipedia.org/wiki/Inverse-chi-squared_distribution>`_
            prior. If unspecified, the defaults are: ``m0 = rec.fa.mean()``,
            ``k0 = 1``, ``v0 = rec.fa.std() ** 2``, and ``nu0 = 3``, where
            ``rec`` is the corresponding edge covariate property map.

210 211 212 213 214 215 216 217 218 219
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
    pclabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Partition constraint labels on the vertices. This has the same
        interpretation as ``clabel``, but will be used to compute the partition
        description length.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be assumed, otherwise the traditional variant will be used.
220 221 222
    allow_empty : ``bool`` (optional, default: ``True``)
        If ``True``, partition description length computed will allow for empty
        groups.
223 224 225
    max_BE : ``int`` (optional, default: ``1000``)
        If the number of blocks exceeds this value, a sparse matrix is used for
        the block graph. Otherwise a dense matrix will be used.
226

227 228
    """

229 230
    def __init__(self, g, b=None, B=None, eweight=None, vweight=None, recs=[],
                 rec_types=[], rec_params=[], clabel=None, pclabel=None,
231
                 deg_corr=True, allow_empty=False, max_BE=1000, **kwargs):
232
        kwargs = kwargs.copy()
233 234

        # initialize weights to unity, if necessary
235 236 237
        if eweight == "unity":
            eweight = g.new_ep("int", 1)
        elif eweight is None or isinstance(eweight, libinference.unity_eprop_t):
238
            eweight = libinference.unity_eprop_t()
239
        elif eweight.value_type() != "int32_t":
240 241
            eweight = g.own_property(eweight.copy(value_type="int32_t"))
        else:
242 243 244 245 246 247
            eweight = g.own_property(eweight)
        if vweight == "unity":
            vweight = g.new_vp("int", 1)
        elif vweight is None or isinstance(vweight, libinference.unity_vprop_t):
            vweight = libinference.unity_vprop_t()
        elif vweight.value_type() != "int32_t":
248
            vweight = g.own_property(vweight.copy(value_type="int32_t"))
249 250
        else:
            vweight = g.own_property(vweight)
251 252 253
        self.eweight = eweight
        self.vweight = vweight

254 255 256 257 258 259 260
        is_edge_weighted = not isinstance(eweight, libinference.unity_eprop_t)
        is_vertex_weighted = not isinstance(vweight, libinference.unity_vprop_t)
        if not is_edge_weighted and is_vertex_weighted:
            self.eweight = g.new_ep("int", 1)
        if not is_vertex_weighted and is_edge_weighted:
            self.vweight = g.new_vp("int", 1)
        self.is_weighted = is_edge_weighted or is_vertex_weighted
261 262 263 264 265 266

        # configure the main graph and block model parameters
        self.g = g

        self.deg_corr = deg_corr
        self.overlap = False
267
        self.degs = kwargs.pop("degs", libinference.simple_degs_t())
268 269
        if self.degs is None:
            self.degs = libinference.simple_degs_t()
270 271 272 273 274 275 276
        elif self.degs == "weighted":
            idx_ = self.g.vertex_index.copy("int")
            self.degs = libinference.get_block_degs(self.g._Graph__graph,
                                                    _prop("v", self.g, idx_),
                                                    self.eweight._get_any(),
                                                    self.g.num_vertices(True))

277 278 279 280 281 282 283 284

        # ensure we have at most as many blocks as nodes
        if B is not None and b is None:
            B = min(B, self.g.num_vertices())

        if b is None:
            # create a random partition into B blocks.
            if B is None:
285
                raise ValueError("either 'b' or 'B' must be specified")
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
            B = min(B, self.g.num_vertices())
            ba = random.randint(0, B, self.g.num_vertices())
            ba[:B] = arange(B)        # avoid empty blocks
            if B < self.g.num_vertices():
                random.shuffle(ba)
            b = g.new_vp("int")
            b.fa = ba
            self.b = b
        else:
            # if a partition is available, we will incorporate it.
            if isinstance(b, numpy.ndarray):
                self.b = g.new_vp("int")
                self.b.fa = b
            else:
                self.b = b = g.own_property(b.copy(value_type="int32_t"))
            if B is None:
                B = int(self.b.fa.max()) + 1

        if self.b.fa.max() >= B:
            raise ValueError("Maximum value of b is larger or equal to B! (%d vs %d)" %
                             (self.b.fa.max(), B))

308 309 310
        self.recs = recs
        if len(recs) == 0:
            self.rec = self.g.new_ep("vector<double>")
311
        else:
312 313
            recs = [x.copy("double") for x in recs]
            self.rec = group_vector_property(recs)
314
        self.drec = kwargs.pop("drec", None)
315
        if self.drec is None:
316 317 318 319 320 321 322
            if len(self.recs) == 0:
                self.drec = self.g.new_ep("vector<double>")
            else:
                drecs = [x.copy("double") for x in self.recs]
                for r in drecs:
                    r.fa **= 2
                self.drec = group_vector_property(drecs)
323
        else:
324
            self.drec = self.drec.copy()
325

326
        # Construct block-graph
327
        self.bg = get_block_graph(g, B, self.b, self.vweight, self.eweight,
328
                                  rec=self.rec, drec=self.drec)
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
        self.bg.set_fast_edge_removal()

        self.mrs = self.bg.ep["count"]
        self.wr = self.bg.vp["count"]

        self.mrp = self.bg.degree_property_map("out", weight=self.mrs)

        if g.is_directed():
            self.mrm = self.bg.degree_property_map("in", weight=self.mrs)
        else:
            self.mrm = self.mrp

        self.B = B

        if pclabel is not None:
            if isinstance(pclabel, PropertyMap):
                self.pclabel = self.g.own_property(pclabel).copy("int")
            else:
                self.pclabel = self.g.new_vp("int")
                self.pclabel.fa = pclabel
        else:
            self.pclabel = self.g.new_vp("int")

        if clabel is not None:
            if isinstance(clabel, PropertyMap):
                self.clabel = self.g.own_property(clabel).copy("int")
            else:
                self.clabel = self.g.new_vp("int")
                self.clabel.fa = clabel
        elif self.pclabel.fa.max() > 0:
            self.clabel = self.pclabel
        else:
            self.clabel = self.g.new_vp("int")

        if not self._check_clabel():
            raise ValueError("provided clabel is inconsistent with node partition")
365 366 367 368
        if not self._check_clabel(clabel=self.pclabel):
            raise ValueError("provided pclabel is inconsistent with node partition")
        if not self._check_clabel(b=self.pclabel, clabel=self.clabel):
            raise ValueError("provided pclabel and clabel are inconsistent")
369 370 371 372 373

        self.bclabel = self.get_bclabel()

        self.max_BE = max_BE

374
        self.use_hash = self.B > self.max_BE
375

376
        self.ignore_degrees = kwargs.pop("ignore_degrees", None)
377
        if self.ignore_degrees is None:
378
            self.ignore_degrees = self.g.new_vp("bool", False)
379
        else:
380
            self.ignore_degrees = self.g.own_property(self.ignore_degrees).copy("bool")
381

382 383
        self.merge_map = kwargs.pop("merge_map",
                                    self.g.vertex_index.copy("int"))
384

385 386 387 388 389
        self.candidate_blocks = Vector_size_t()
        self.candidate_pos = self.bg.new_vp("int")
        self.empty_blocks = Vector_size_t()
        self.empty_pos = self.bg.new_vp("int")

390 391 392 393 394 395 396 397 398
        self._init_recs(recs, rec_types, rec_params)

        self.allow_empty = allow_empty
        self._abg = self.bg._get_any()
        self._avweight = self.vweight._get_any()
        self._aeweight = self.eweight._get_any()
        self._state = libinference.make_block_state(self, _get_rng())

        if deg_corr:
399
            init_q_cache(max(2 * max(self.get_E(), self.get_N()), 100))
400 401 402 403 404 405 406 407 408 409 410

        self._entropy_args = dict(adjacency=True, dl=True, partition_dl=True,
                                  degree_dl=True, degree_dl_kind="distributed",
                                  edges_dl=True, dense=False, multigraph=True,
                                  exact=True, recs=True)

        if len(kwargs) > 0:
            warnings.warn("unrecognized keyword arguments: " +
                          str(list(kwargs.keys())))

    def _init_recs(self, recs, rec_types, rec_params):
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
        if len(rec_types) != len(recs):
            raise ValueError("The size of 'rec_types' (%d) must be the same of 'recs' (%d)" %
                             (len(rec_types), len(recs)))
        self.rec_types = libcore.Vector_int32_t()
        for rec_type in rec_types:
            if rec_type == "real-exponential":
                rt = libinference.rec_type.real_exponential
            elif rec_type == "real-normal":
                rt = libinference.rec_type.real_normal
            elif rec_type == "discrete-geometric":
                rt = libinference.rec_type.discrete_geometric
            elif rec_type == "discrete-poisson":
                rt = libinference.rec_type.discrete_poisson
            elif rec_type == "discrete-binomial":
                rt = libinference.rec_type.discrete_binomial
            elif rec_type == "delta_t":
                rt = libinference.rec_type.delta_t
428
            else:
429 430
                rt = rec_type
            self.rec_types.append(rt)
431

432 433 434
        self.brec = self.bg.ep.rec
        self.bdrec = self.bg.ep.drec

435 436
        if (len(self.rec_types) > 0 and
            self.rec_types[0] == libinference.rec_type.delta_t): # waiting times
437 438
            self.brecsum = self.bg.degree_property_map("out", self.brec)
            mem = self.ignore_degrees.copy()
439 440
            self.bignore_degrees = self.get_bclabel(clabel=mem).copy("bool")
            self.brecsum.a[self.bignore_degrees.a == 0] = 0
441
        else:
442
            self.brecsum = self.bg.new_vp("double")
443
            self.bignore_degrees = self.bg.new_vp("bool")
444

445
        self.rec_params = rec_params = list(rec_params)
446 447 448 449 450
        while len(rec_params) < len(self.rec_types):
            rec_params.append({})
        self.wparams = libcore.Vector_Vector_double()
        for i, rt in enumerate(self.rec_types):
            ps = Vector_double()
451 452 453 454
            if rt in [libinference.rec_type.real_exponential,
                      libinference.rec_type.discrete_poisson]:
                defaults = OrderedDict([("alpha", 1),
                                        ("beta", self.recs[i].fa.mean())])
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
            elif rt == libinference.rec_type.real_normal:
                defaults = OrderedDict([("m0", self.recs[i].fa.mean()),
                                        ("k0", 1),
                                        ("v0", self.recs[i].fa.std() ** 2),
                                        ("nu0", 3)])
            elif rt == libinference.rec_type.discrete_geometric:
                defaults = OrderedDict([("alpha",1),
                                        ("beta",1)])
            elif rt == libinference.rec_type.discrete_binomial:
                defaults = OrderedDict([("N", self.recs[i].fa.max()),
                                        ("alpha", 1),
                                        ("beta", 1)])
            else: # delta_t
                defaults = OrderedDict([("alpha", 1),
                                        ("beta", self.recs[i].fa.mean())])

            ks = list(defaults.keys())
            defaults.update(rec_params[i])
473
            rec_params[i] = defaults.copy()
474 475 476 477 478 479
            for k in ks:
                ps.append(defaults.pop(k))
            if len(defaults) > 0:
                raise ValueError("unknown parameters for weight type: " +
                                 str(list(defaults.keys())))
            self.wparams.append(ps)
480

481
    def __repr__(self):
482 483 484
        return "<BlockState object with %d blocks (%d nonempty),%s%s for graph %s, at 0x%x>" % \
            (self.B, self.get_nonempty_B(),
             " degree-corrected," if self.deg_corr else "",
485 486 487
             ((" with %d edge covariate%s," % (len(self.rec_types),
                                               "s" if len(self.rec_types) > 1 else ""))
              if len(self.rec_types) > 0 else ""),
488
             str(self.g), id(self))
489 490 491 492 493 494 495 496 497

    def __copy__(self):
        return self.copy()

    def __deepcopy__(self, memo):
        g = copy.deepcopy(self.g, memo)
        return self.copy(g=g)

    def copy(self, g=None, eweight=None, vweight=None, b=None, B=None,
498 499
             deg_corr=None, clabel=None, overlap=False, pclabel=None,
             max_BE=None, **kwargs):
500 501 502
        r"""Copies the block state. The parameters override the state properties, and
         have the same meaning as in the constructor."""

503 504
        recs = ungroup_vector_property(self.rec, range(len(self.recs)))

505 506 507 508 509 510 511 512 513 514 515
        if not overlap:
            state = BlockState(self.g if g is None else g,
                               eweight=self.eweight if eweight is None else eweight,
                               vweight=self.vweight if vweight is None else vweight,
                               b=self.b.copy() if b is None else b,
                               B=(self.B if b is None else None) if B is None else B,
                               clabel=self.clabel if clabel is None else clabel,
                               pclabel=self.pclabel if pclabel is None else pclabel,
                               deg_corr=self.deg_corr if deg_corr is None else deg_corr,
                               max_BE=self.max_BE if max_BE is None else max_BE,
                               degs=self.degs.copy(),
516
                               merge_map=kwargs.pop("merge_map",
517
                                                    self.merge_map.copy()),
518
                               recs=kwargs.pop("recs", recs),
519
                               drec=kwargs.pop("drec", self.drec),
520
                               rec_types=kwargs.pop("rec_types", self.rec_types),
521
                               rec_params=kwargs.pop("rec_params",
522
                                                     self.rec_params),
523 524 525
                               ignore_degrees=kwargs.pop("ignore_degrees",
                                                         self.ignore_degrees),
                               allow_empty=kwargs.pop("allow_empty",
526
                                                      self.allow_empty),
527
                               **kwargs)
528 529 530 531
        else:
            state = OverlapBlockState(self.g if g is None else g,
                                      b=self.b.copy() if b is None else b,
                                      B=(self.B if b is None else None) if B is None else B,
532
                                      recs=kwargs.pop("recs", recs),
533
                                      drec=kwargs.pop("drec", self.drec),
534 535
                                      rec_types=kwargs.pop("rec_types",
                                                           self.rec_types),
536
                                      rec_params=kwargs.pop("rec_params",
537
                                                            self.rec_params),
538 539 540
                                      clabel=self.clabel if clabel is None else clabel,
                                      pclabel=self.pclabel if pclabel is None else pclabel,
                                      deg_corr=self.deg_corr if deg_corr is None else deg_corr,
541
                                      allow_empty=kwargs.pop("allow_empty",
542
                                                             self.allow_empty),
543
                                      max_BE=self.max_BE if max_BE is None else max_BE,
544
                                      **kwargs)
545 546 547 548 549
        return state


    def __getstate__(self):
        state = dict(g=self.g,
550 551
                     eweight=self.eweight if self.is_weighted else None,
                     vweight=self.vweight if self.is_weighted else None,
552 553 554 555 556
                     b=self.b,
                     B=self.B,
                     clabel=self.clabel,
                     pclabel=self.pclabel,
                     deg_corr=self.deg_corr,
557
                     allow_empty=self.allow_empty,
558
                     max_BE=self.max_BE,
559 560
                     recs=ungroup_vector_property(self.rec,
                                                  range(len(self.recs))),
561 562
                     drec=self.drec,
                     rec_types=list(self.rec_types),
563
                     rec_params=self.rec_params,
564
                     ignore_degrees=self.ignore_degrees.copy("int"),
565 566 567 568 569 570 571 572 573
                     degs=self.degs if not isinstance(self.degs,
                                                      libinference.simple_degs_t) else None,
                     merge_map=self.merge_map)
        return state

    def __setstate__(self, state):
        conv_pickle_state(state)
        self.__init__(**state)

574
    def get_block_state(self, b=None, vweight=False, **kwargs):
575 576 577 578 579 580
        r"""Returns a :class:`~graph_tool.community.BlockState`` corresponding to the
        block graph (i.e. the blocks of the current state become the nodes). The
        parameters have the same meaning as the in the constructor. If ``vweight
        == True`` the nodes of the block state are weighted with the node
        counts."""

581 582
        deg_corr = kwargs.pop("deg_corr", self.deg_corr)
        copy_bg = kwargs.pop("copy_bg", True)
583

584 585 586
        if deg_corr and vweight:
            if isinstance(self.degs, libinference.simple_degs_t):
                degs = libinference.get_block_degs(self.g._Graph__graph,
587
                                                   _prop("v", self.g, self.b),
588 589
                                                   self.eweight._get_any(),
                                                   self.get_B())
590 591 592 593
            else:
                degs = libinference.get_weighted_block_degs(self.g._Graph__graph,
                                                            self.degs,
                                                            _prop("v", self.g,
594 595
                                                                  self.b),
                                                            self.get_B())
596 597 598
        else:
            degs = libinference.simple_degs_t()

599 600 601 602 603 604
        if copy_bg:
            bg = self.bg.copy()
            eweight = bg.own_property(self.mrs.copy())
        else:
            bg = self.bg
            eweight = self.mrs
605 606
            if self.g.get_vertex_filter()[0] is not None:
                bg = GraphView(bg, vfilt=numpy.ones(bg.num_vertices()))
607 608

        recs = False
609 610 611 612 613 614 615 616 617
        if vweight == "nonempty":
            vweight = bg.new_vp("int", self.wr.a > 0)
        elif vweight == "unity":
            vweight = bg.new_vp("int", 1)
        elif vweight == True:
            if copy_bg:
                vweight = bg.own_property(self.wr.copy())
            else:
                vweight = self.wr
618
            recs = True
619 620 621 622 623 624
        else:
            vweight = None
        state = BlockState(bg,
                           eweight=eweight,
                           vweight=vweight,
                           b=bg.vertex_index.copy("int") if b is None else b,
625
                           deg_corr=deg_corr,
626
                           allow_empty=kwargs.pop("allow_empty",
627
                                                  self.allow_empty),
628
                           degs=degs,
629
                           rec_types=kwargs.pop("rec_types",
630
                                                self.rec_types if recs else []),
631 632 633
                           recs=kwargs.pop("recs",
                                           ungroup_vector_property(bg.ep.rec,
                                                                   range(len(self.rec_types)))
634
                                           if (recs and len(self.rec_types) > 0)
635 636
                                           else []),
                           drec=kwargs.pop("drec",
637
                                           bg.ep.drec if (recs and len(self.rec_types) > 0)
638
                                           else None),
639 640 641 642 643
                           rec_params=kwargs.pop("rec_params", self.rec_params),
                           ignore_degrees=kwargs.pop("ignore_degrees",
                                                     self.get_bclabel(clabel=self.ignore_degrees)),
                           clabel=kwargs.pop("clabel", self.get_bclabel()),
                           pclabel=kwargs.pop("pclabel", self.get_bpclabel()),
644
                           max_BE=self.max_BE,
645
                           **kwargs)
646 647
        return state

648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
    def get_E(self):
        r"Returns the total number of edges."
        return int(self.eweight.fa.sum()) if self.is_weighted else self.g.num_edges()

    def get_N(self):
        r"Returns the total number of nodes."
        return int(self.vweight.fa.sum()) if self.is_weighted else self.g.num_vertices()

    def get_B(self):
        r"Returns the total number of blocks."
        return self.bg.num_vertices()

    def get_nonempty_B(self):
        r"Returns the total number of nonempty blocks."
        return int((self.wr.a > 0).sum())

664
    def get_bclabel(self, clabel=None):
Tiago Peixoto's avatar
Tiago Peixoto committed
665
        r"""Returns a :class:`~graph_tool.PropertyMap` corresponding to constraint
666 667 668 669
        labels for the block graph."""

        bclabel = self.bg.new_vertex_property("int")
        reverse_map(self.b, bclabel)
670 671 672
        if clabel is None:
            clabel = self.clabel
        pmap(bclabel, clabel)
673 674 675 676 677 678
        return bclabel

    def get_bpclabel(self):
        r"""Returns a :class:`~graph_tool.PropertyMap`` corresponding to partition
        constraint labels for the block graph."""

679
        return self.get_bclabel(self.pclabel)
680

681 682 683 684 685 686 687 688 689 690
    def _check_clabel(self, clabel=None, b=None):
        if b is None:
            b = self.b
        if clabel is None:
            clabel = self.clabel
        joint = group_vector_property([b, clabel])
        joint = perfect_prop_hash([joint])[0]
        joint = b.fa.copy()
        b = b.fa.copy()
        continuous_map(joint)
691
        continuous_map(b)
692
        return (b == joint).all()
693

694 695 696 697 698 699
    def _couple_state(self, state, entropy_args):
        if state is None:
            self._state.decouple_state()
        else:
            self._state.couple_state(state._state, entropy_args)

700 701 702 703
    def get_blocks(self):
        r"""Returns the property map which contains the block labels for each vertex."""
        return self.b

704 705 706 707 708 709
    def set_blocks(self, b):
        r"""Sets the internal partition of the state."""
        if b.value_type() != "int32_t":
            b = b.copy("int32_t")
        self._state.set_partition(_prop("v", self.g, b))

710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
    def get_bg(self):
        r"""Returns the block graph."""
        return self.bg

    def get_ers(self):
        r"""Returns the edge property map of the block graph which contains the
        :math:`e_{rs}` matrix entries.  For undirected graphs, the diagonal
        values (self-loops) contain :math:`e_{rr}/2`."""
        return self.mrs

    def get_er(self):
        r"""Returns the vertex property map of the block graph which contains the number
        :math:`e_r` of half-edges incident on block :math:`r`. If the graph is
        directed, a pair of property maps is returned, with the number of
        out-edges :math:`e^+_r` and in-edges :math:`e^-_r`, respectively."""
        if self.bg.is_directed():
            return self.mrp, self.mrm
        else:
            return self.mrp

    def get_nr(self):
        r"""Returns the vertex property map of the block graph which contains the block
        sizes :math:`n_r`."""
        return self.wr

735 736
    def entropy(self, adjacency=True, dl=True, partition_dl=True,
                degree_dl=True, degree_dl_kind="distributed", edges_dl=True,
737 738
                dense=False, multigraph=True, deg_entropy=True, recs=True,
                exact=True, **kwargs):
Tiago Peixoto's avatar
Tiago Peixoto committed
739 740
        r"""Calculate the entropy (a.k.a. negative log-likelihood) associated
        with the current block partition.
741 742 743

        Parameters
        ----------
744 745 746 747 748 749
        adjacency : ``bool`` (optional, default: ``True``)
            If ``True``, the adjacency term of the description length will be
            included.
        dl : ``bool`` (optional, default: ``True``)
            If ``True``, the description length for the parameters will be
            included.
750 751
        partition_dl : ``bool`` (optional, default: ``True``)
            If ``True``, and ``dl == True`` the partition description length
752
            will be included.
753 754
        degree_dl : ``bool`` (optional, default: ``True``)
            If ``True``, and ``dl == True`` the degree sequence description
755 756 757 758
            length will be included (for degree-corrected models).
        degree_dl_kind : ``str`` (optional, default: ``"distributed"``)
            This specifies the prior used for the degree sequence. It must be
            one of: ``"uniform"``, ``"distributed"`` (default) or ``"entropy"``.
759 760
        edges_dl : ``bool`` (optional, default: ``True``)
            If ``True``, and ``dl == True`` the edge matrix description length
761
            will be included.
762 763
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
764
        multigraph : ``bool`` (optional, default: ``True``)
765 766 767
            If ``True``, the multigraph entropy will be used.
        deg_entropy : ``bool`` (optional, default: ``True``)
            If ``True``, the degree entropy term that is independent of the
768
            network partition will be included (for degree-corrected models).
769 770 771
        recs : ``bool`` (optional, default: ``True``)
            If ``True``, the likelihood for real or discrete-valued edge
            covariates is computed.
772 773 774
        exact : ``bool`` (optional, default: ``True``)
            If ``True``, the exact expressions will be used. Otherwise,
            Stirling's factorial approximation will be used for some terms.
775 776 777

        Notes
        -----
778

Tiago Peixoto's avatar
Tiago Peixoto committed
779
        The "entropy" of the state is the negative log-likelihood of the
780
        microcanonical SBM, that includes the generated graph
Tiago Peixoto's avatar
Tiago Peixoto committed
781 782
        :math:`\boldsymbol{A}` and the model parameters
        :math:`\boldsymbol{\theta}`,
783 784 785

        .. math::

Tiago Peixoto's avatar
Tiago Peixoto committed
786 787
           \Sigma &= - \ln P(\boldsymbol{A},\boldsymbol{\theta}) \\
                  &= - \ln P(\boldsymbol{A}|\boldsymbol{\theta}) - \ln P(\boldsymbol{\theta}).
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807

        This value is also called the `description length
        <https://en.wikipedia.org/wiki/Minimum_description_length>`_ of the data,
        and it corresponds to the amount of information required to describe it
        (in `nats <https://en.wikipedia.org/wiki/Nat_(unit)>`_).

        For the traditional blockmodel (``deg_corr == False``), the model
        parameters are :math:`\boldsymbol{\theta} = \{\boldsymbol{e},
        \boldsymbol{b}\}`, where :math:`\boldsymbol{e}` is the matrix of edge
        counts between blocks, and :math:`\boldsymbol{b}` is the partition of the
        nodes into blocks. For the degree-corrected blockmodel (``deg_corr ==
        True``), we have an additional set of parameters, namely the degree
        sequence :math:`\boldsymbol{k}`.

        For the traditional blockmodel, the model likelihood is

        .. math::

            P(\boldsymbol{A}|\boldsymbol{e},\boldsymbol{b}) &= \frac{\prod_{r<s}e_{rs}!\prod_re_{rr}!!}{\prod_rn_r^{e_r}}\times \frac{1}{\prod_{i<j}A_{ij}!\prod_iA_{ii}!!},\\
            P(\boldsymbol{A}|\boldsymbol{e},\boldsymbol{b}) &= \frac{\prod_{rs}e_{rs}!}{\prod_rn_r^{e_r}}\times \frac{1}{\prod_{ij}A_{ij}!},
808 809 810 811 812 813

        for undirected and directed graphs, respectively, where :math:`e_{rs}`
        is the number of edges from block :math:`r` to :math:`s` (or the number
        of half-edges for the undirected case when :math:`r=s`), and :math:`n_r`
        is the number of vertices in block :math:`r` .

814
        For the degree-corrected variant the equivalent expressions are
815 816 817

        .. math::

818 819
            P(\boldsymbol{A}|\boldsymbol{e},\boldsymbol{b},\boldsymbol{k}) &= \frac{\prod_{r<s}e_{rs}!\prod_re_{rr}!!}{\prod_re_r!}\times \frac{\prod_ik_i!}{\prod_{i<j}A_{ij}!\prod_iA_{ii}!!},\\
            P(\boldsymbol{A}|\boldsymbol{e},\boldsymbol{b},\boldsymbol{k}) &= \frac{\prod_{rs}e_{rs}!}{\prod_re_r^+!\prod_re_r^-!}\times \frac{\prod_ik_i^+!\prod_ik_i^-!}{\prod_{ij}A_{ij}!},
820 821 822 823 824 825

        where :math:`e_r = \sum_se_{rs}` is the number of half-edges incident on
        block :math:`r`, and :math:`e^+_r = \sum_se_{rs}` and :math:`e^-_r =
        \sum_se_{sr}` are the numbers of out- and in-edges adjacent to block
        :math:`r`, respectively.

826 827 828
        If ``exact == False``, `Stirling's approximation
        <https://en.wikipedia.org/wiki/Stirling%27s_approximation>`_ is used in
        the above expression.
829

830 831
        If ``dense == True``, the likelihood for the non-degree-corrected model
        becomes instead
832

833
        .. math::
834

835 836
            P(\boldsymbol{A}|\boldsymbol{e},\boldsymbol{b})^{-1} &= \prod_{r<s}{n_rn_s\choose e_{rs}}\prod_r{{n_r\choose 2}\choose e_{rr}/2},\\
            P(\boldsymbol{A}|\boldsymbol{e},\boldsymbol{b})^{-1} &= \prod_{rs}{n_rn_s\choose e_{rs}}
837

838 839
        if ``multigraph == False``, otherwise we replace :math:`{n\choose
        m}\to\left(\!\!{n\choose m}\!\!\right)` above, where
Tiago Peixoto's avatar
Tiago Peixoto committed
840
        :math:`\left(\!\!{n\choose m}\!\!\right) = {n+m-1\choose m}`.  A "dense"
841 842
        entropy for the degree-corrected model is not available, and if
        requested will raise a :exc:`NotImplementedError`.
843

844 845 846 847
        If ``dl == True``, the description length :math:`\mathcal{L} = -\ln
        P(\boldsymbol{\theta})` of the model will be returned as well. The terms
        :math:`P(\boldsymbol{e})` and :math:`P(\boldsymbol{b})` are described in
        described in :func:`model_entropy`.
848

849 850 851
        For the degree-corrected model we need to specify the prior
        :math:`P(\boldsymbol{k})` for the degree sequence as well. Here there
        are three options:
852

853
        1. ``degree_dl_kind == "uniform"``
854

855
            .. math::
856

857
                P(\boldsymbol{k}|\boldsymbol{e},\boldsymbol{b}) = \prod_r\left(\!\!{n_r\choose e_r}\!\!\right)^{-1}.
858

Tiago Peixoto's avatar
Tiago Peixoto committed
859 860 861 862
            This corresponds to a noninformative prior, where the degrees are
            sampled from a uniform distribution.

        2. ``degree_dl_kind == "distributed"`` (default)
863

864
            .. math::
865

866
                P(\boldsymbol{k}|\boldsymbol{e},\boldsymbol{b}) = \prod_r\frac{\prod_k\eta_k^r!}{n_r!} \prod_r q(e_r, n_r)^{-1}
867

868 869 870 871 872
            with :math:`\eta_k^r` being the number of nodes with degree
            :math:`k` in group :math:`r`, and :math:`q(n,m)` being the number of
            `partitions
            <https://en.wikipedia.org/wiki/Partition_(number_theory)>`_ of
            integer :math:`n` into at most :math:`m` parts.
873

Tiago Peixoto's avatar
Tiago Peixoto committed
874 875 876 877
            This corresponds to a prior for the degree sequence conditioned on
            the degree frequencies, which are themselves sampled from a uniform
            hyperprior. This option should be preferred in most cases.

878
        3. ``degree_dl_kind == "entropy"``
879

880
            .. math::
881

882
                P(\boldsymbol{k}|\boldsymbol{e},\boldsymbol{b}) \approx \prod_r\exp\left(-n_rH(\boldsymbol{k}_r)\right)
883

884 885
            where :math:`H(\boldsymbol{k}_r) = -\sum_kp_r(k)\ln p_r(k)` is the
            entropy of the degree distribution inside block :math:`r`.
886

887 888 889
            Note that, differently from the other two choices, this represents
            only an approximation of the description length. It is meant to be
            used only for comparison purposes, and should be avoided in practice.
890

891 892
        For the directed case, the above expressions are duplicated for the in-
        and out-degrees.
893

Tiago Peixoto's avatar
Tiago Peixoto committed
894 895 896 897 898 899 900 901 902 903 904
        References
        ----------

        .. [peixoto-nonparametric-2017] Tiago P. Peixoto, "Nonparametric
           Bayesian inference of the microcanonical stochastic block model",
           Phys. Rev. E 95 012317 (2017), :doi:`10.1103/PhysRevE.95.012317`,
           :arxiv:`1610.02703`
        .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block
           structures and high-resolution model selection in large networks ",
           Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
           :arxiv:`1310.4377`.
905 906 907 908 909 910 911 912
        """

        if _bm_test() and kwargs.get("test", True):
            args = dict(**locals())
            args.update(**kwargs)
            del args["self"]
            del args["kwargs"]

913 914
        kwargs = kwargs.copy()

915 916
        E = self.get_E()
        N = self.get_N()
917

918 919
        S = 0
        if adjacency:
920
            S += self._state.entropy(dense, multigraph, deg_entropy, exact, recs)
921

922 923 924 925 926
            if not dense and not exact:
                if multigraph:
                    S -= E
                else:
                    S += E
927 928 929 930 931 932

        if dl:
            if partition_dl:
                S += self._state.get_partition_dl()

            if edges_dl:
933 934 935
                if self.allow_empty:
                    actual_B = self.B
                else:
936
                    actual_B = self.get_nonempty_B()
937 938 939 940
                S += model_entropy(actual_B, N, E,
                                   directed=self.g.is_directed(), nr=False)

            if self.deg_corr and degree_dl:
941 942 943 944 945 946 947
                if degree_dl_kind == "entropy":
                    kind = libinference.deg_dl_kind.ent
                elif degree_dl_kind == "uniform":
                    kind = libinference.deg_dl_kind.uniform
                elif degree_dl_kind == "distributed":
                    kind = libinference.deg_dl_kind.dist
                S += self._state.get_deg_dl(kind)
948

949
        callback = kwargs.pop("callback", None)
950 951 952
        if callback is not None:
            S += callback(self)

953
        if kwargs.pop("test", True) and _bm_test():
954 955 956 957
            assert not isnan(S) and not isinf(S), \
                "invalid entropy %g (%s) " % (S, str(args))

            args["test"] = False
958 959
            state_copy = self.copy()
            Salt = state_copy.entropy(**args)
960

961 962 963
            assert abs(S - Salt) < 1e-6, \
                "entropy discrepancy after copying (%g %g)" % (S, Salt)

964 965 966
        if len(kwargs) > 0:
            raise ValueError("unrecognized keyword arguments: " +
                             str(list(kwargs.keys())))
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
        return S

    def get_matrix(self):
        r"""Returns the block matrix (as a sparse :class:`~scipy.sparse.csr_matrix`),
        which contains the number of edges between each block pair.

        .. warning::

           This corresponds to the adjacency matrix of the block graph, which by
           convention includes twice the amount of edges in the diagonal entries
           if the graph is undirected.

        Examples
        --------

        .. testsetup:: get_matrix

           gt.seed_rng(42)
           np.random.seed(42)
           from pylab import *

        .. doctest:: get_matrix

           >>> g = gt.collection.data["polbooks"]
           >>> state = gt.BlockState(g, B=5, deg_corr=True)
           >>> state.mcmc_sweep(niter=1000)
           (...)
           >>> m = state.get_matrix()
           >>> figure()
           <...>
           >>> matshow(m.todense())
           <...>
           >>> savefig("bloc_mat.pdf")

        .. testcleanup:: get_matrix

           savefig("bloc_mat.png")

        .. figure:: bloc_mat.*
           :align: center

           A  5x5 block matrix.

       """

        return adjacency(self.bg, weight=self.mrs)

1014
    def virtual_vertex_move(self, v, s, **kwargs):
1015 1016 1017
        r"""Computes the entropy difference if vertex ``v`` is moved to block ``s``. The
        remaining parameters are the same as in
        :meth:`graph_tool.BlockState.entropy`."""
1018
        return self._state.virtual_move(int(v), self.b[v], s,
1019 1020
                                        get_entropy_args(dict(self._entropy_args,
                                                              **kwargs)))
1021 1022

    def move_vertex(self, v, s):
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
        r"""Move vertex ``v`` to block ``s``.

        This optionally accepts a list of vertices and blocks to move
        simultaneously.
        """
        if not isinstance(v, collections.Iterable):
            self._state.move_vertex(int(v), s)
        else:
            self._state.move_vertices(numpy.asarray(v, dtype="uint64"),
                                      numpy.asarray(s, dtype="uint64"))
1033 1034 1035 1036

    def remove_vertex(self, v):
        r"""Remove vertex ``v`` from its current group.

1037 1038
        This optionally accepts a list of vertices to remove.

1039 1040 1041 1042 1043 1044
        .. warning::

           This will leave the state in an inconsistent state before the vertex
           is returned to some other group, or if the same vertex is removed
           twice.
        """
1045
        if isinstance(v, collections.Iterable):
1046 1047
            if not isinstance(v, numpy.ndarray):
                v = list(v)
1048
            self._state.remove_vertices(numpy.asarray(v, dtype="uint64"))
1049 1050
        else:
            self._state.remove_vertex(int(v))
1051 1052 1053 1054

    def add_vertex(self, v, r):
        r"""Add vertex ``v`` to block ``r``.