#! /usr/bin/env python # -*- coding: utf-8 -*- # # graph_tool -- a general graph manipulation python module # # Copyright (C) 2006-2014 Tiago de Paula Peixoto # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from __future__ import division, absolute_import, print_function import sys if sys.version_info < (3,): range = xrange from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap from .. stats import label_self_loops import random from numpy import * import numpy from scipy.optimize import fsolve, fminbound import scipy.special from collections import defaultdict import copy import heapq from .. dl_import import dl_import dl_import("from . import libgraph_tool_community as libcommunity") def get_block_graph(g, B, b, vcount, ecount): cg, br, vcount, ecount = condensation_graph(g, b, vweight=vcount, eweight=ecount, self_loops=True)[:4] cg.vp["count"] = vcount cg.ep["count"] = ecount cg = Graph(cg, vorder=br) cg.add_vertex(B - cg.num_vertices()) return cg class BlockState(object): r"""This class encapsulates the block state of a given graph. This must be instantiated and used by functions such as :func:mcmc_sweep. Parameters ---------- g : :class:~graph_tool.Graph Graph to be modelled. eweight : :class:~graph_tool.PropertyMap (optional, default: None) Edge weights (i.e. multiplicity). vweight : :class:~graph_tool.PropertyMap (optional, default: None) Vertex weights (i.e. multiplicity). b : :class:~graph_tool.PropertyMap (optional, default: None) Initial block labels on the vertices. If not supplied, it will be randomly sampled. B : int (optional, default: None) Number of blocks. If not supplied it will be either obtained from the parameter b, or set to the maximum possible value according to the minimum description length. clabel : :class:~graph_tool.PropertyMap (optional, default: None) This parameter provides a constraint label, such that vertices with different labels will not be allowed to belong to the same block. If not given, all labels will be assumed to be the same. deg_corr : bool (optional, default: True) If True, the degree-corrected version of the blockmodel ensemble will be assumed, otherwise the traditional variant will be used. max_BE : int (optional, default: 1000) If the number of blocks exceeds this number, a sparse representation of the block graph is used, which is slightly less efficient, but uses less memory, """ def __init__(self, g, eweight=None, vweight=None, b=None, B=None, clabel=None, deg_corr=True, max_BE=1000): self.g = g if eweight is None: eweight = g.new_edge_property("int") eweight.a = 1 elif eweight.value_type() != "int32_t": eweight = eweight.copy(value_type="int32_t") if vweight is None: vweight = g.new_vertex_property("int") vweight.a = 1 elif vweight.value_type() != "int32_t": vweight = vweight.copy(value_type="int32_t") self.eweight = eweight self.vweight = vweight self.E = int(self.eweight.fa.sum()) self.N = int(self.vweight.fa.sum()) self.deg_corr = deg_corr if b is None: if B is None: B = get_max_B(self.N, self.E, directed=g.is_directed()) ba = random.randint(0, B, g.num_vertices()) ba[:B] = arange(B) # avoid empty blocks random.shuffle(ba) b = g.new_vertex_property("int") b.fa = ba self.b = b else: if B is None: B = int(b.fa.max()) + 1 self.b = b = b.copy(value_type="int32_t") if b.fa.max() >= B: raise ValueError("Maximum value of b is larger or equal to B!") # Construct block-graph self.bg = get_block_graph(g, B, b, vweight, eweight) self.bg.set_fast_edge_removal() self.mrs = self.bg.ep["count"] self.wr = self.bg.vp["count"] del self.bg.ep["count"] del self.bg.vp["count"] self.mrp = self.bg.degree_property_map("out", weight=self.mrs) if g.is_directed(): self.mrm = self.bg.degree_property_map("in", weight=self.mrs) else: self.mrm = self.mrp self.vertices = libcommunity.get_vector(B) self.vertices.a = arange(B) self.B = B self.clabel = clabel if self.clabel is None: self.clabel = self.g.new_vertex_property("int") self.bclabel = self.bg.new_vertex_property("int") libcommunity.vector_rmap(self.b.a, self.bclabel.a) libcommunity.vector_map(self.bclabel.a, self.clabel.a) self.emat = None if max_BE is None: max_BE = 1000 self.max_BE = max_BE # used by mcmc_sweep() self.egroups = None self.nsampler = None self.sweep_vertices = None # used by merge_sweep() self.bnsampler = None libcommunity.init_safelog(int(2 * max(self.E, self.N))) libcommunity.init_xlogx(int(2 * max(self.E, self.N))) libcommunity.init_lgamma(int(3 * max(self.E, self.N))) def __get_emat(self): if self.emat is None: self.__regen_emat() return self.emat def __regen_emat(self): if self.B <= self.max_BE: self.emat = libcommunity.create_emat(self.bg._Graph__graph) else: self.emat = libcommunity.create_ehash(self.bg._Graph__graph) def __build_egroups(self, empty=False): self.esrcpos = self.g.new_edge_property("int") self.etgtpos = self.g.new_edge_property("int") self.is_weighted = True if self.eweight.fa.max() > 1 else False self.egroups = libcommunity.build_egroups(self.g._Graph__graph, self.bg._Graph__graph, _prop("v", self.g, self.b), _prop("e", self.g, self.eweight), _prop("e", self.g, self.esrcpos), _prop("e", self.g, self.etgtpos), self.is_weighted, empty) def __build_nsampler(self): self.nsampler = libcommunity.init_neighbour_sampler(self.g._Graph__graph, _prop("e", self.g, self.eweight)) def __build_bnsampler(self): self.bnsampler = libcommunity.init_neighbour_sampler(self.bg._Graph__graph, _prop("e", self.bg, self.mrs)) def __cleanup_bg(self): emask = self.bg.new_edge_property("bool") emask.a = self.mrs.a[:len(emask.a)] > 0 self.bg.set_edge_filter(emask) self.bg.purge_edges() def get_blocks(self): r"""Returns the property map which contains the block labels for each vertex.""" return self.b def get_bg(self): r"""Returns the block graph.""" return self.bg def get_ers(self): r"""Returns the edge property map of the block graph which contains the :math:e_{rs} matrix entries.""" return self.mrs def get_er(self): r"""Returns the vertex property map of the block graph which contains the number :math:e_r of half-edges incident on block :math:r. If the graph is directed, a pair of property maps is returned, with the number of out-edges :math:e^+_r and in-edges :math:e^-_r, respectively.""" if self.bg.is_directed(): return self.mrp, self.mrm else: return self.mrp def get_nr(self): r"""Returns the vertex property map of the block graph which contains the block sizes :math:n_r.""" return self.wr def get_eweight(self): r"""Returns the block edge counts associated with the block matrix :math:e_{rs}. For directed graphs it is identical to :math:e_{rs}, but for undirected graphs it is identical except for the diagonal, which is :math:e_{rr}/2.""" eweight = self.mrs.copy() if not self.g.is_directed(): sl = label_self_loops(self.bg, mark_only=True) eweight.a[sl.a > 0] /= 2 return eweight def entropy(self, complete=False, random=False, dl=False, dense=False, multigraph=False): r"""Calculate the entropy per edge associated with the current block partition. Parameters ---------- complete : bool (optional, default: False) If True, the complete entropy will be returned, including constant terms not relevant to the block partition. random : bool (optional, default: False) If True, the entropy entropy corresponding to an equivalent random graph (i.e. no block partition) will be returned. dl : bool (optional, default: False) If True, the full description length will be returned. dense : bool (optional, default: False) If True, the "dense" variant of the entropy will be computed. multigraph : bool (optional, default: False) If True, the multigraph entropy will be used. Only has an effect if dense == True. Notes ----- For the traditional blockmodel (deg_corr == False), the entropy is given by .. math:: \mathcal{S}_t &\cong E - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right), \\ \mathcal{S}^d_t &\cong E - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right), for undirected and directed graphs, respectively, where :math:e_{rs} is the number of edges from block :math:r to :math:s (or the number of half-edges for the undirected case when :math:r=s), and :math:n_r is the number of vertices in block :math:r . For the degree-corrected variant with "hard" degree constraints the equivalent expressions are .. math:: \mathcal{S}_c &\cong -E -\sum_kN_k\ln k! - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e_re_s}\right), \\ \mathcal{S}^d_c &\cong -E -\sum_{k^+}N_{k^+}\ln k^+! -\sum_{k^-}N_{k^-}\ln k^-! - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e^+_re^-_s}\right), where :math:e_r = \sum_se_{rs} is the number of half-edges incident on block :math:r, and :math:e^+_r = \sum_se_{rs} and :math:e^-_r = \sum_se_{sr} are the number of out- and in-edges adjacent to block :math:r, respectively. If complete == False only the last term of the equations above will be returned. If random == True it will be assumed that :math:B=1 despite the actual :math:e_{rs} matrix. If dl == True, the description length :math:\mathcal{L}_t of the model will be returned as well, as described in :func:model_entropy. Note that for the degree-corrected version the description length is .. math:: \mathcal{L}_c = \mathcal{L}_t - \sum_rn_r\sum_kp^r_k\ln p^r_k, where :math:p^r_k is the fraction of nodes in block $r$ with degree :math:k. For directed graphs we have instead :math:k \to (k^-, k^+). If the "dense" entropies are requested, they will be computed as .. math:: \mathcal{S}_t &= \sum_{r>s} \ln{\textstyle {n_rn_s \choose e_{rs}}} + \sum_r \ln{\textstyle {{n_r\choose 2}\choose e_{rr}/2}}\\ \mathcal{S}^d_t &= \sum_{rs} \ln{\textstyle {n_rn_s \choose e_{rs}}}, for simple graphs, and .. math:: \mathcal{S}_m &= \sum_{r>s} \ln{\textstyle \left(\!\!{n_rn_s \choose e_{rs}}\!\!\right)} + \sum_r \ln{\textstyle \left(\!\!{\left(\!{n_r\choose 2}\!\right)\choose e_{rr}/2}\!\!\right)}\\ \mathcal{S}^d_m &= \sum_{rs} \ln{\textstyle \left(\!\!{n_rn_s \choose e_{rs}}\!\!\right)}, for multigraphs (i.e. multigraph == True). Note that in all cases the value returned corresponds to the entropy per edge, i.e. :math:(\mathcal{S}_{t/c}\; [\,+\, \mathcal{L}_{t/c}])/ E. """ E = self.E N = self.N if dense: if random: bg = get_block_graph(self.bg, 1, self.bg.new_vertex_property("int"), self.wr, self.mrs) S = libcommunity.entropy_dense(bg._Graph__graph, _prop("e", bg, bg.ep["count"]), _prop("v", bg, bg.vp["count"]), multigraph) else: S = libcommunity.entropy_dense(self.bg._Graph__graph, _prop("e", self.bg, self.mrs), _prop("v", self.bg, self.wr), multigraph) else: if self.deg_corr: if self.g.is_directed(): S_rand = E * log(E) else: S_rand = E * log(2 * E) else: ak = E / float(N) if self.g.is_directed() else 2 * E / float(N) S_rand = E * log (N / ak) if random: S = S_rand else: S = libcommunity.entropy(self.bg._Graph__graph, _prop("e", self.bg, self.mrs), _prop("v", self.bg, self.mrp), _prop("v", self.bg, self.mrm), _prop("v", self.bg, self.wr), self.deg_corr) if complete: if self.deg_corr: S -= E for v in self.g.vertices(): S -= scipy.special.gammaln(v.out_degree() + 1) if self.g.is_directed(): S -= scipy.special.gammaln(v.in_degree() + 1) else: S += E else: S -= S_rand if dl: if random: S += model_entropy(1, N, E, directed=self.g.is_directed()) * E else: S += model_entropy(self.B, N, E, directed=self.g.is_directed(), nr=self.wr.a) * E if self.deg_corr: S_seq = libcommunity.deg_entropy(self.g._Graph__graph, _prop("v", self.g, self.b), self.B) S += S_seq return S / E def remove_vertex(self, v): r"""Remove vertex v from its current block.""" libcommunity.remove_vertex(self.g._Graph__graph, self.bg._Graph__graph, int(v), _prop("e", self.bg, self.mrs), _prop("v", self.bg, self.mrp), _prop("v", self.bg, self.mrm), _prop("v", self.bg, self.wr), _prop("v", self.g, self.b)) self.egroups = None self.nb_list = None self.nb_count = None def add_vertex(self, v, r): r"""Add vertex v to block r.""" libcommunity.add_vertex(v.get_graph()._Graph__graph, self.bg._Graph__graph, int(v), int(r), _prop("e", self.bg, self.mrs), _prop("v", self.bg, self.mrp), _prop("v", self.bg, self.mrm), _prop("v", self.bg, self.wr), _prop("v", self.g, self.b)) self.egroups = None self.nb_list = None self.nb_count = None def move_vertex(self, v, nr): r"""Move vertex v to block nr, and return the entropy difference.""" dS = libcommunity.move_vertex(self.g._Graph__graph, self.bg._Graph__graph, self.__get_emat(), int(v), int(nr), _prop("e", self.bg, self.mrs), _prop("v", self.bg, self.mrp), _prop("v", self.bg, self.mrm), _prop("v", self.bg, self.wr), _prop("v", self.g, self.b), self.deg_corr, _prop("e", self.bg, self.eweight), _prop("v", self.bg, self.vweight)) self.egroups = None self.nb_list = None self.nb_count = None return dS / float(self.E) def get_matrix(self, reorder=False, niter=0, ret_order=False): r"""Returns the block matrix, which contains the number of edges between each block pair. Parameters ---------- reorder : bool (optional, default: False) If True, the matrix is reordered so that blocks which are 'similar' are close together. niter : int (optional, default: 0) Number of iterations performed to obtain the best ordering. If niter == 0 it will automatically determined. Only has effect if reorder == True. ret_order : bool (optional, default: False) If True, the vertex ordering is returned. Only has effect if reorder == True. Examples -------- .. testsetup:: get_matrix gt.seed_rng(42) np.random.seed(42) from pylab import * .. doctest:: get_matrix >>> g = gt.collection.data["polbooks"] >>> state = gt.BlockState(g, B=5, deg_corr=True) >>> for i in range(1000): ... ds, nmoves = gt.mcmc_sweep(state) >>> m = state.get_matrix(reorder=True) >>> figure() <...> >>> matshow(m) <...> >>> savefig("bloc_mat.pdf") .. testcleanup:: get_matrix savefig("bloc_mat.png") .. figure:: bloc_mat.* :align: center A 5x5 block matrix. """ B = self.B vmap = {} for r in range(len(self.vertices)): vmap[self.vertices[r]] = r if reorder: if niter == 0: niter = 10 states = [] label = None states = [self] Bi = self.B // 2 while Bi > 1: state = BlockState(states[-1].bg, b=states[-1].bg.vertex_index.copy("int"), B=states[-1].bg.num_vertices(), clabel=states[-1].bclabel, vweight=states[-1].wr, eweight=states[-1].mrs, deg_corr=states[-1].deg_corr, max_BE=states[-1].max_BE) state = greedy_shrink(state, B=Bi, nsweeps=niter, epsilon=1e-3, c=0, nmerge_sweeps=niter) for i in range(niter): mcmc_sweep(state, c=0, beta=float("inf")) states.append(state) Bi //= 2 if Bi < self.bclabel.a.max() + 1: break vorder = list(range(len(states[-1].vertices))) for state in reversed(states[1:]): norder = [[] for i in range(state.B)] for v in state.g.vertices(): pos = vorder.index(state.b[v]) norder[pos].append(int(v)) vorder = [item for sublist in norder for item in sublist] else: vorder = self.vertices order_map = zeros(B, dtype="int") for i, v in enumerate(vorder): order_map[vmap[v]] = i m = zeros((B, B)) rmap = {} for e in self.bg.edges(): r, s = vmap[int(e.source())], vmap[int(e.target())] r = order_map[r] s = order_map[s] rmap[r] = int(e.source()) rmap[s] = int(e.target()) m[r, s] = self.mrs[e] if not self.bg.is_directed(): m[s, r] = m[r, s] for r in range(B): if r not in rmap: rmap[r] = r if ret_order: return m, rmap else: return m def model_entropy(B, N, E, directed=False, nr=None): r"""Computes the amount of information necessary for the parameters of the traditional blockmodel ensemble, for B blocks, N vertices, E edges, and either a directed or undirected graph. A traditional blockmodel is defined as a set of :math:N vertices which can belong to one of :math:B blocks, and the matrix :math:e_{rs} describes the number of edges from block :math:r to :math:s (or twice that number if :math:r=s and the graph is undirected). For an undirected graph, the number of distinct :math:e_{rs} matrices is given by, .. math:: \Omega_m = \left(\!\!{\left(\!{B \choose 2}\!\right) \choose E}\!\!\right) and for a directed graph, .. math:: \Omega_m = \left(\!\!{B^2 \choose E}\!\!\right) where :math:\left(\!{n \choose k}\!\right) = {n+k-1\choose k} is the number of :math:k combinations with repetitions from a set of size :math:n. The total information necessary to describe the model is then, .. math:: \mathcal{L}_t = \ln\Omega_m + \ln\left(\!\!{B \choose N}\!\!\right) + \ln N! - \sum_r \ln n_r!, where the remaining term is the information necessary to describe the block partition, where :math:n_r is the number of nodes in block :math:r. If nr is None, it is assumed :math:n_r=N/B. The value returned corresponds to the information per edge, i.e. :math:\mathcal{L}_t/E. References ---------- .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks", Phys. Rev. Lett. 110, 148701 (2013), :doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794. .. [peixoto-hierarchical-2013] Tiago P. Peixoto, "Hierarchical block structures and high-resolution model selection in large networks ", :arxiv:1310.4377. """ if directed: x = (B * B); else: x = (B * (B + 1)) / 2; L = lbinom(x + E - 1, E) + partition_entropy(B, N, nr) return L / E def Sdl(B, S, N, E, directed=False): return S + model_entropy(B, N, E, directed) def lbinom(n, k): return scipy.special.gammaln(n + 1) - scipy.special.gammaln(n - k + 1) - scipy.special.gammaln(k + 1) def partition_entropy(B, N, nr=None): if nr is None: S = N * log(B) + log1p(-(1 - 1./B) ** N) else: S = lbinom(B + N - 1, N) + scipy.special.gammaln(N + 1) - scipy.special.gammaln(nr + 1).sum() return S def get_max_B(N, E, directed=False): r"""Return the maximum detectable number of blocks, obtained by minimizing: .. math:: \mathcal{L}_t(B, N, E) - E\ln B where :math:\mathcal{L}_t(B, N, E) is the information necessary to describe a traditional blockmodel with B blocks, N nodes and E edges (see :func:model_entropy). Examples -------- >>> gt.get_max_B(N=1e6, E=5e6) 1572 References ---------- .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks", Phys. Rev. Lett. 110, 148701 (2013), :doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794. """ B = fminbound(lambda B: Sdl(B, -log(B), N, E, directed), 1, E, xtol=1e-6, maxfun=1500, disp=0) if isnan(B): B = 1 return max(int(ceil(B)), 2) def get_akc(B, I, N=float("inf"), directed=False): r"""Return the minimum value of the average degree of the network, so that some block structure with :math:B blocks can be detected, according to the minimum description length criterion. This is obtained by solving .. math:: \Sigma_b = \mathcal{L}_t(B, N, E) - E\mathcal{I}_{t/c} = 0, where :math:\mathcal{L}_{t} is the necessary information to describe the blockmodel parameters (see :func:model_entropy), and :math:\mathcal{I}_{t/c} characterizes the planted block structure, and is given by .. math:: \mathcal{I}_t &= \sum_{rs}m_{rs}\ln\left(\frac{m_{rs}}{w_rw_s}\right),\\ \mathcal{I}_c &= \sum_{rs}m_{rs}\ln\left(\frac{m_{rs}}{m_rm_s}\right), where :math:m_{rs} = e_{rs}/2E (or :math:m_{rs} = e_{rs}/E for directed graphs) and :math:w_r=n_r/N. We note that :math:\mathcal{I}_{t/c}\in[0, \ln B]. In the case where :math:E \gg B^2, this simplifies to .. math:: \left_c &= \frac{2\ln B}{\mathcal{I}_{t/c}},\\ \left_c &= \frac{\ln B}{\mathcal{I}_{t/c}}, for undirected and directed graphs, respectively. This limit is assumed if N == inf. Examples -------- >>> gt.get_akc(10, log(10) / 100, N=100) 2.414413200430159 References ---------- .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks", Phys. Rev. Lett. 110, 148701 (2013), :doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794. """ if N != float("inf"): if directed: get_dl = lambda ak: model_entropy(B, N, N * ak, directed) - N * ak * I else: get_dl = lambda ak: model_entropy(B, N, N * ak / 2., directed) - N * ak * I / 2. ak = fsolve(lambda ak: get_dl(ak), 10) ak = float(ak) else: ak = 2 * log(B) / S if directed: ak /= 2 return ak def mcmc_sweep(state, beta=1., random_move=False, c=1., dense=False, multigraph=False, sequential=True, vertices=None, verbose=False): r"""Performs a Markov chain Monte Carlo sweep on the network, to sample the block partition according to a probability :math:\propto e^{-\beta \mathcal{S}_{t/c}}, where :math:\mathcal{S}_{t/c} is the blockmodel entropy. Parameters ---------- state : :class:~graph_tool.community.BlockState The block state. beta : float (optional, default: 1.0) The inverse temperature parameter :math:\beta. random_move : bool (optional, default: False) If True, the proposed moves will attempt to place the vertices in fully randomly-chosen blocks. If False, the proposed moves will be chosen with a probability depending on the membership of the neighbours and the currently-inferred block structure. c : float (optional, default: 1.0) This parameter specifies how often fully random moves are attempted, instead of more likely moves based on the inferred block partition. For c == 0, no fully random moves are attempted, and for c == inf they are always attempted. dense : bool (optional, default: False) If True, the "dense" variant of the entropy will be computed. multigraph : bool (optional, default: False) If True, the multigraph entropy will be used. Only has an effect if dense == True. sequential : bool (optional, default: True) If True, the move attempts on the vertices are done in sequential random order. Otherwise a total of N moves attempts are made, where N is the number of vertices, where each vertex can be selected with equal probability. vertices : list of ints (optional, default: None) A list of vertices which will be attempted to be moved. If None, all vertices will be attempted. verbose : bool (optional, default: False) If True, verbose information is displayed. Returns ------- dS : float The entropy difference (per edge) after a full sweep. nmoves : int The number of accepted block membership moves. Notes ----- This algorithm performs a Markov chain Monte Carlo sweep on the network, where the block memberships are randomly moved, and either accepted or rejected, so that after sufficiently many sweeps the partitions are sampled with probability proportional to :math:e^{-\beta\mathcal{S}_{t/c}}, where :math:\mathcal{S}_{t/c} is the blockmodel entropy, given by .. math:: \mathcal{S}_t &\cong - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right), \\ \mathcal{S}^d_t &\cong - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right), for undirected and directed traditional blockmodels (deg_corr == False), respectively, where :math:e_{rs} is the number of edges from block :math:r to :math:s (or the number of half-edges for the undirected case when :math:r=s), and :math:n_r is the number of vertices in block :math:r, and constant terms which are independent of the block partition were dropped (see :meth:BlockState.entropy for the complete entropy). For the degree-corrected variant with "hard" degree constraints the equivalent expressions are .. math:: \mathcal{S}_c &\cong - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e_re_s}\right), \\ \mathcal{S}^d_c &\cong - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e^+_re^-_s}\right), where :math:e_r = \sum_se_{rs} is the number of half-edges incident on block :math:r, and :math:e^+_r = \sum_se_{rs} and :math:e^-_r = \sum_se_{sr} are the number of out- and in-edges adjacent to block :math:r, respectively. The Monte Carlo algorithm employed attempts to improve the mixing time of the Markov chain by proposing membership moves :math:r\to s with probability :math:p(r\to s|t) \propto e_{ts} + c, where :math:t is the block label of a random neighbour of the vertex being moved. See [peixoto-efficient-2013]_ for more details. This algorithm has a complexity of :math:O(E), where :math:E is the number of edges in the network. Examples -------- .. testsetup:: mcmc gt.seed_rng(42) np.random.seed(42) .. doctest:: mcmc >>> g = gt.collection.data["polbooks"] >>> state = gt.BlockState(g, B=3, deg_corr=True) >>> pv = None >>> for i in range(1000): # remove part of the transient ... ds, nmoves = gt.mcmc_sweep(state) >>> for i in range(1000): ... ds, nmoves = gt.mcmc_sweep(state) ... pv = gt.collect_vertex_marginals(state, pv) >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft.pdf") <...> .. testcleanup:: mcmc gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft.png") .. figure:: polbooks_blocks_soft.* :align: center "Soft" block partition of a political books network with :math:B=3. References ---------- .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, Samuel Leinhardt, "Stochastic blockmodels: First steps", Carnegie-Mellon University, Pittsburgh, PA 15213, U.S.A., :doi:10.1016/0378-8733(83)90021-7 .. [faust-blockmodels-1992] Katherine Faust, and Stanley Wasserman. "Blockmodels: Interpretation and Evaluation." Social Networks 14, no. 1-2 (1992): 5-61. :doi:10.1016/0378-8733(92)90013-W .. [karrer-stochastic-2011] Brian Karrer, and M. E. J. Newman. "Stochastic Blockmodels and Community Structure in Networks." Physical Review E 83, no. 1 (2011): 016107. :doi:10.1103/PhysRevE.83.016107. .. [peixoto-entropy-2012] Tiago P. Peixoto "Entropy of Stochastic Blockmodel Ensembles." Physical Review E 85, no. 5 (2012): 056122. :doi:10.1103/PhysRevE.85.056122, :arxiv:1112.6028. .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks", Phys. Rev. Lett. 110, 148701 (2013), :doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794. .. [peixoto-efficient-2013] Tiago P. Peixoto, "Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models", :arxiv:1310.4378. """ if state.B == 1: return 0., 0 if vertices is not None: vlist = libcommunity.get_vector(len(vertices)) vlist.a = vertices vertices = vlist state.sweep_vertices = vertices if state.sweep_vertices is None: vertices = libcommunity.get_vector(state.g.num_vertices()) vertices.a = state.g.vertex_index.copy("int").fa state.sweep_vertices = vertices if random_move: state._BlockState__build_egroups(empty=True) elif state.egroups is None: state._BlockState__build_egroups(empty=False) if state.nsampler is None: state._BlockState__build_nsampler() state.bnsampler = None try: dS, nmoves = libcommunity.move_sweep(state.g._Graph__graph, state.bg._Graph__graph, state._BlockState__get_emat(), state.nsampler, _prop("e", state.bg, state.mrs), _prop("v", state.bg, state.mrp), _prop("v", state.bg, state.mrm), _prop("v", state.bg, state.wr), _prop("v", state.g, state.b), _prop("v", state.bg, state.bclabel), state.sweep_vertices, state.deg_corr, dense, multigraph, _prop("e", state.g, state.eweight), _prop("v", state.g, state.vweight), state.egroups, _prop("e", state.g, state.esrcpos), _prop("e", state.g, state.etgtpos), float(beta), sequential, random_move, c, state.is_weighted, verbose, _get_rng()) finally: if random_move: state.egroups = None return dS / state.E, nmoves def merge_sweep(state, bm, nmerges, nsweeps=10, dense=False, multigraph=False, random_moves=False, verbose=False): if state.B == 1: return 0., 0 if state.bnsampler is None: state._BlockState__build_bnsampler() state.egroups = None state.nsampler = None dS, nmoves = libcommunity.merge_sweep(state.bg._Graph__graph, state._BlockState__get_emat(), state.bnsampler, _prop("e", state.bg, state.mrs), _prop("v", state.bg, state.mrp), _prop("v", state.bg, state.mrm), _prop("v", state.bg, state.wr), _prop("v", state.bg, bm), _prop("v", state.bg, state.bclabel), state.deg_corr, dense, multigraph, nsweeps, nmerges, random_moves, verbose, _get_rng()) return dS / state.E, nmoves def greedy_shrink(state, B, nsweeps=10, adaptive_sweeps=True, nmerge_sweeps=None, epsilon=0, r=2, greedy=True, anneal=(1, 1), c=1, dense=False, multigraph=False, random_move=False, verbose=False, sequential=True): if B > state.B: raise ValueError("Cannot shrink to a larger size!") if nmerge_sweeps is None: nmerge_sweeps = max((2 * state.g.num_edges()) // state.g.num_vertices(), 1) nmerged = 0 state = BlockState(state.g, b=state.b, B=state.B, clabel=state.clabel, vweight=state.vweight, eweight=state.eweight, deg_corr=state.deg_corr, max_BE=state.max_BE) cg = state.bg.copy() cg_vweight = cg.own_property(state.wr.copy()) cg_eweight = cg.own_property(state.mrs.copy()) cg_clabel = cg.own_property(state.bclabel.copy()) # merge according to indirect neighbourhood bm = state.bg.vertex_index.copy("int") random = random_move while nmerged < state.B - B: dS, nmoves = merge_sweep(state, bm, nmerges=state.B - B - nmerged, nsweeps=nmerge_sweeps, random_moves=random) nmerged += nmoves if verbose: print("merging", dS, nmoves, nmerged) if nmoves == 0: random = True if verbose: print("can't merge... switching to random") # Merged block-level state bmap = -ones(len(bm.a), dtype=bm.a.dtype) libcommunity.vector_map(bm.a, bmap) bm = cg.own_property(bm) bg_state = BlockState(cg, b=bm, B=B, clabel=cg_clabel, vweight=cg_vweight, eweight=cg_eweight, deg_corr=state.deg_corr, max_BE=state.max_BE) if bg_state.g.num_vertices() != state.g.num_vertices() and nsweeps > 0: # Perform block-level moves if verbose: print("Performing block-level moves...") multilevel_minimize(bg_state, B=B, nsweeps=nsweeps, adaptive_sweeps=adaptive_sweeps, epsilon=epsilon, r=r, greedy=greedy, anneal=anneal, c=c, dense=dense, multigraph=multigraph, random_move=random_move, sequential=sequential, verbose=verbose) bm = bg_state.b libcommunity.vector_map(state.b.a, bm.a) state = BlockState(state.g, b=state.b, B=B, clabel=state.clabel, vweight=state.vweight, eweight=state.eweight, deg_corr=state.deg_corr, max_BE=state.max_BE) return state class MinimizeState(object): r"""This object stores information regarding the current entropy minimization state, so that the algorithms can resume previously started runs. This object can be saved to disk via the :mod:pickle interface.""" def __init__(self): self.b_cache = {} self.checkpoint_state = defaultdict(dict) def clear(self): self.b_cache.clear() self.checkpoint_state.clear() def multilevel_minimize(state, B, nsweeps=10, adaptive_sweeps=True, epsilon=0, anneal=(1., 1.), r=2., nmerge_sweeps=10, greedy=True, random_move=False, c=1., dense=False, multigraph=False, sequential=True, checkpoint=None, minimize_state=None, verbose=False): r"""Performs an agglomerative heuristic, which progressively merges blocks together (while allowing individual node moves) to achieve a good partition in B blocks. Parameters ---------- state : :class:~graph_tool.community.BlockState The block state. B : int The desired number of blocks. nsweeps : int (optional, default: 10) The number of sweeps done after each merge step to reach the local minimum. adaptive_sweeps : bool (optional, default: True) If True, the number of sweeps necessary for the local minimum will be estimated to be enough so that no more than epsilon * N nodes changes their states in the last nsweeps sweeps. epsilon : float (optional, default: 0) Converge criterion for adaptive_sweeps. anneal : pair of floats (optional, default: (1., 1.)) The first value specifies the starting value for beta of the MCMC steps, and the second value is the factor which is multiplied to beta after each estimated equilibration (according to nsweeps and adaptive_sweeps). r : float (optional, default: 2.) Agglomeration ratio for the merging steps. Each merge step will attempt to find the best partition into :math:B_{i-1} / r blocks, where :math:B_{i-1} is the number of blocks in the last step. nmerge_sweeps : int (optional, default: 10) The number of merge sweeps done, where in each sweep a better merge candidate is searched for every block. greedy : bool (optional, default: True) If True, the value of beta of the MCMC steps are kept at infinity for all steps. Otherwise they change according to the anneal parameter. random_move : bool (optional, default: False) If True, the proposed moves will attempt to place the vertices in fully randomly-chosen blocks. If False, the proposed moves will be chosen with a probability depending on the membership of the neighbours and the currently-inferred block structure. c : float (optional, default: 1.0) This parameter specifies how often fully random moves are attempted, instead of more likely moves based on the inferred block partition. For c == 0, no fully random moves are attempted, and for c == inf they are always attempted. dense : bool (optional, default: False) If True, the "dense" variant of the entropy will be computed. multigraph : bool (optional, default: False) If True, the multigraph entropy will be used. Only has an effect if dense == True. sequential : bool (optional, default: True) If True, the move attempts on the vertices are done in sequential random order. Otherwise a total of N moves attempts are made, where N is the number of vertices, where each vertex can be selected with equal probability. vertices: list of ints (optional, default: None) A list of vertices which will be attempted to be moved. If None, all vertices will be attempted. checkpoint : function (optional, default: None) If provided, this function will be called after each call to :func:mcmc_sweep. This can be used to store the current state, so it can be continued later. The function must have the following signature: .. code-block:: python def checkpoint(state, S, delta, nmoves, minimize_state): ... where state is either a :class:~graph_tool.community.BlockState instance or None, S is the current entropy value, delta is the entropy difference in the last MCMC sweep, and nmoves is the number of accepted block membership moves. The minimize_state argument is a :class:MinimizeState instance which specifies the current state of the algorithm, which can be stored via :mod:pickle, and supplied via the minimize_state option below to continue from an interrupted run. This function will also be called when the MCMC has finished for the current value of :math:B, in which case state == None, and the remaining parameters will be zero, except the last. minimize_state : :class:MinimizeState (optional, default: None) If provided, this will specify an exact point of execution from which the algorithm will continue. The expected object is a :class:MinimizeState instance which will be passed to the callback of the checkpoint option above, and can be stored by :mod:pickle. verbose : bool (optional, default: False) If True, verbose information is displayed. Returns ------- state : :class:~graph_tool.community.BlockState The new :class:~graph_tool.community.BlockState with B blocks. Notes ----- This algorithm performs an agglomerative heuristic on the current block state, where blocks are progressively merged together, using repeated applications of the :func:mcmc_sweep moves, at different scales. See [peixoto-efficient-2013]_ for more details. This algorithm has a complexity of :math:O(N\ln^2 N), where :math:N is the number of nodes in the network. Examples -------- .. testsetup:: multilevel_minimize gt.seed_rng(42) np.random.seed(42) .. doctest:: multilevel_minimize >>> g = gt.collection.data["polblogs"] >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(gt.GraphView(g, directed=False))) >>> state = gt.BlockState(g, B=g.num_vertices(), deg_corr=True) >>> state = gt.multilevel_minimize(state, B=2) >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=state.get_blocks(), output="polblogs_agg.pdf") <...> .. testcleanup:: multilevel_minimize gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=state.get_blocks(), output="polblogs_agg.png") .. figure:: polblogs_agg.* :align: center Block partition of a political blogs network with :math:B=2. References ---------- .. [peixoto-efficient-2013] Tiago P. Peixoto, "Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models", :arxiv:1310.4378. """ if minimize_state is None: minimize_state = MinimizeState() b_cache = minimize_state.b_cache checkpoint_state = minimize_state.checkpoint_state # some trivial boundary conditions if B == 1: bi = state.g.new_vertex_property("int") state = BlockState(state.g, vweight=state.vweight, eweight=state.eweight, b=bi, clabel=state.clabel, deg_corr=state.deg_corr, max_BE=state.max_BE) return state if B == state.g.num_vertices(): bi = state.g.new_vertex_property("int") bi.fa = range(state.g.num_vertices()) state = BlockState(state.g, vweight=state.vweight, eweight=state.eweight, B=state.g.num_vertices(), b=bi, clabel=state.clabel, deg_corr=state.deg_corr, max_BE=state.max_BE) return state Bi = state.B while True: Bi = max(int(round(Bi / r)), B) if Bi == state.B and Bi > B: Bi -= 1 if b_cache is not None and Bi in b_cache: bi = state.g.new_vertex_property("int") bi.fa = b_cache[Bi][1] state = BlockState(state.g, B=Bi, b=bi, vweight=state.vweight, eweight=state.eweight, clabel=state.clabel, deg_corr=state.deg_corr, max_BE=state.max_BE) if Bi < state.B: if verbose: print("Shrinking:", state.B, "->", Bi) state = greedy_shrink(state, B=Bi, nsweeps=nsweeps, epsilon=epsilon, c=c, dense=dense, multigraph=multigraph, nmerge_sweeps=nmerge_sweeps, sequential=sequential, verbose=verbose) if "S" in checkpoint_state[Bi]: S = checkpoint_state[Bi]["S"] niter = checkpoint_state[Bi]["niter"] else: S = state.entropy(dl=True) checkpoint_state[Bi]["S"] = S niter = 0 checkpoint_state[Bi]["niter"] = niter if b_cache is not None and Bi not in b_cache: b_cache[Bi] = [float("inf"), array(state.b.fa), None] if not adaptive_sweeps: ntotal = nsweeps if greedy else 2 * nsweeps if verbose: print("Performing %d sweeps for B=%d..." % (ntotal, Bi)) for i in range(ntotal): if i < niter: continue if i < nsweeps and not greedy: beta = anneal[0] else: beta = float("inf") delta, nmoves = mcmc_sweep(state, beta=beta, c=c, dense=dense, multigraph=multigraph, sequential=sequential, random_move=random_move) S += delta niter += 1 checkpoint_state[Bi]["S"] = S checkpoint_state[Bi]["niter"] = niter if b_cache is not None: b_cache[Bi][1] = array(state.b.fa) if checkpoint is not None: checkpoint(state, S, delta, nmoves, minimize_state) else: # adaptive mode min_dl = checkpoint_state[Bi].get("min_dl", S) max_dl = checkpoint_state[Bi].get("max_dl", S) count = checkpoint_state[Bi].get("count", 0) bump = checkpoint_state[Bi].get("bump", False) beta = checkpoint_state[Bi].get("beta", anneal[0]) last_min = checkpoint_state[Bi].get("last_min", min_dl) greedy_step = checkpoint_state[Bi].get("greedy_step", greedy) total_nmoves = checkpoint_state[Bi].get("total_nmoves", 0) if verbose and not greedy: print("Performing sweeps for beta = %g, B=%d (N=%d)..." % \ (beta, Bi, state.g.num_vertices())) eps = 1e-8 niter = 0 while True: if greedy_step: break if count > nsweeps: if not bump: min_dl = max_dl = S bump = True count = 0 else: if anneal[1] <= 1 or min_dl == last_min: break else: beta *= anneal[1] count = 0 last_min = min_dl if verbose: print("Performing sweeps for beta = %g, B=%d (N=%d)..." % \ (beta, Bi, state.g.num_vertices())) delta, nmoves = mcmc_sweep(state, beta=beta, c=c, dense=dense, multigraph=multigraph, sequential=sequential, random_move=random_move) S += delta niter += 1 total_nmoves += nmoves if S > max_dl + eps: max_dl = S count = 0 elif S < min_dl - eps: min_dl = S count = 0 else: count += 1 checkpoint_state[B]["S"] = S checkpoint_state[B]["niter"] = niter checkpoint_state[B]["min_dl"] = min_dl checkpoint_state[B]["max_dl"] = max_dl checkpoint_state[B]["count"] = count checkpoint_state[B]["bump"] = bump checkpoint_state[B]["total_nmoves"] = total_nmoves if b_cache is not None: b_cache[Bi][0] = float("inf") b_cache[Bi][1] = array(state.b.fa) if checkpoint is not None: checkpoint(state, S, delta, nmoves, minimize_state) if verbose: if not greedy_step: print("... performed %d sweeps with %d vertex moves" % (niter, total_nmoves)) print(u"Performing sweeps for beta = ∞, B=%d (N=%d)..." % \ (Bi, state.g.num_vertices())) if not greedy_step: checkpoint_state[Bi]["greedy_step"] = True min_dl = S count = 0 niter = 0 total_nmoves = 0 while count <= nsweeps: delta, nmoves = mcmc_sweep(state, beta=float("inf"), c=c, dense=dense, multigraph=multigraph, sequential=sequential, random_move=random_move) S += delta niter += 1 total_nmoves += nmoves # if verbose: # print("Moved:", delta, nmoves, # nmoves / state.g.num_vertices(), # epsilon, count) #if nmoves > epsilon * state.g.num_vertices(): if abs(delta) > eps and nmoves / state.g.num_vertices() > epsilon: min_dl = S count = 0 else: count += 1 checkpoint_state[Bi]["S"] = S checkpoint_state[Bi]["min_dl"] = min_dl checkpoint_state[Bi]["count"] = count checkpoint_state[B]["total_nmoves"] = total_nmoves if b_cache is not None: b_cache[Bi][1] = array(state.b.fa) if checkpoint is not None: checkpoint(state, S, delta, nmoves, minimize_state) if verbose: print("... performed %d sweeps with %d vertex moves" % (niter, total_nmoves)) bi = state.b if Bi == B: break return state def get_state_dl(state, dense, nested_dl, clabel=None): if not nested_dl: dl = state.entropy(dense=dense, multigraph=dense, dl=True) else: dl = state.entropy(dense=dense, multigraph=dense, dl=False) + \ partition_entropy(B=state.B, N=state.N, nr=state.wr.a) / state.E if clabel is None: bclabel = state.bclabel else: bclabel = state.bg.new_vertex_property("int") libcommunity.vector_rmap(state.b.a, bclabel.a) libcommunity.vector_map(bclabel.a, clabel.a) bstate = BlockState(state.bg, b=bclabel, eweight=state.mrs, deg_corr=False) dl += bstate.entropy(dl=False, dense=True, multigraph=True) + \ partition_entropy(B=bstate.B, N=bstate.N, nr=bstate.wr.a) / state.E return dl def get_b_dl(g, vweight, eweight, B, nsweeps, adaptive_sweeps, c, random_move, sequential, shrink, r, anneal, greedy, epsilon, nmerge_sweeps, clabel, deg_corr, dense, sparse_heuristic, checkpoint, minimize_state, max_BE, nested_dl, verbose): bs = minimize_state.b_cache checkpoint_state = minimize_state.checkpoint_state previous = None if B in bs and checkpoint_state[B].get("done", False): # A previous finished result is available. Use that and keep going. if verbose: print("(using previous finished result for B=%d)" % B) return bs[B][0] elif B in bs: # A previous unfinished result is available. Use that as the starting point. if verbose: print("(starting from previous result for B=%d)" % B) b = g.new_vertex_property("int") b.fa = bs[B][1] state = BlockState(g, b=b, B=B, vweight=vweight, eweight=eweight, clabel=clabel, deg_corr=deg_corr, max_BE=max_BE) previous = bs[B] else: # No previous result is available. bs_keys = [k for k in bs.keys() if type(k) != str] B_sup = max(max(bs_keys), B) if len(bs_keys) > 0 else B for Bi in bs_keys: if Bi > B and Bi < B_sup: B_sup = Bi if B_sup == B or not shrink: # Start from scratch. bi = g.new_vertex_property("int") bi.fa = range(g.num_vertices()) state = BlockState(g, B=g.num_vertices(), b=bi, vweight=vweight, eweight=eweight, clabel=clabel, deg_corr=deg_corr, max_BE=max_BE) else: # Start from result with B_sup > B, and shrink it. if verbose: print("(shrinking from B=%d to B=%d)" % (B_sup, B)) b = g.new_vertex_property("int") b.fa = bs[B_sup][1] if B > 1: state = BlockState(g, B=B_sup, b=b, vweight=vweight, eweight=eweight, clabel=clabel, deg_corr=deg_corr, max_BE=max_BE) else: bi = g.new_vertex_property("int") bi.fa = range(g.num_vertices()) state = BlockState(g, B=g.num_vertices(), b=bi, vweight=vweight, eweight=eweight, clabel=clabel, deg_corr=deg_corr, max_BE=max_BE) # perform the actual minimization state = multilevel_minimize(state, B, nsweeps=nsweeps, adaptive_sweeps=adaptive_sweeps, epsilon=epsilon, r=r, greedy=greedy, nmerge_sweeps=nmerge_sweeps, anneal=anneal, c=c, random_move=random_move, dense=dense and not sparse_heuristic, multigraph=dense, sequential=sequential, minimize_state=minimize_state, checkpoint=checkpoint, verbose=verbose) dl = get_state_dl(state, dense, nested_dl) if previous is None or dl < previous[0]: # the current result improved the previous one bs[B] = [dl, array(state.b.fa)] if verbose: print("(using new result for B=%d with L=%g)" % (B, dl)) else: # the previous result is better than the current one if verbose: print("(kept old result for B=%d with L=%g [vs L=%g])" % (B, previous[0], dl)) dl = previous[0] checkpoint_state[B]["done"] = True assert(not isinf(dl)) return dl def fibo(n): phi = (1 + sqrt(5)) / 2 return int(round(phi ** n / sqrt(5))) def fibo_n_floor(x): phi = (1 + sqrt(5)) / 2 n = floor(log(x * sqrt(5) + 0.5) / log(phi)) return int(n) def get_mid(a, b): n = fibo_n_floor(b - a) return b - fibo(n - 1) def is_fibo(x): return fibo(fibo_n_floor(x)) == x def minimize_blockmodel_dl(g, eweight=None, vweight=None, deg_corr=True, dense=False, sparse_heuristic=False, random_move=False, c=0, nsweeps=100, adaptive_sweeps=True, epsilon=0., anneal=(1., 1.), greedy_cooling=True, sequential=True, r=2, nmerge_sweeps=10, max_B=None, min_B=1, mid_B=None, clabel=None, checkpoint=None, minimize_state=None, exhaustive=False, max_BE=None, nested_dl=False, verbose=False): r"""Find the block partition of an unspecified size which minimizes the description length of the network, according to the stochastic blockmodel ensemble which best describes it. Parameters ---------- g : :class:~graph_tool.Graph Graph being used. eweight : :class:~graph_tool.PropertyMap (optional, default: None) Edge weights (i.e. multiplicity). vweight : :class:~graph_tool.PropertyMap (optional, default: None) Vertex weights (i.e. multiplicity). deg_corr : bool (optional, default: True) If True, the degree-corrected version of the blockmodel ensemble will be assumed, otherwise the traditional variant will be used. dense : bool (optional, default: False) If True, the "dense" variant of the entropy will be computed. sparse_heuristic : bool (optional, default: False) If True, the sparse entropy will be used to find the best partition, but the dense entropy will be used to compare different partitions. This has an effect only if dense == True. random_move : bool (optional, default: False) If True, the proposed moves will attempt to place the vertices in fully randomly-chosen blocks. If False, the proposed moves will be chosen with a probability depending on the membership of the neighbours and the currently-inferred block structure. c : float (optional, default: 1.0) This parameter specifies how often fully random moves are attempted, instead of more likely moves based on the inferred block partition. For c == 0, no fully random moves are attempted, and for c == inf they are always attempted. nsweeps : int (optional, default: 10) The number of sweeps done after each merge step to reach the local minimum. adaptive_sweeps : bool (optional, default: True) If True, the number of sweeps necessary for the local minimum will be estimated to be enough so that no more than epsilon * N nodes changes their states in the last nsweeps sweeps. epsilon : float (optional, default: 0) Converge criterion for adaptive_sweeps. anneal : pair of floats (optional, default: (1., 1.)) The first value specifies the starting value for beta of the MCMC steps, and the second value is the factor which is multiplied to beta after each estimated equilibration (according to nsweeps and adaptive_sweeps). greedy_cooling : bool (optional, default: True) If True, the value of beta of the MCMC steps are kept at infinity for all steps. Otherwise they change according to the anneal parameter. sequential : bool (optional, default: True) If True, the move attempts on the vertices are done in sequential random order. Otherwise a total of N moves attempts are made, where N is the number of vertices, where each vertex can be selected with equal probability. r : float (optional, default: 2.) Agglomeration ratio for the merging steps. Each merge step will attempt to find the best partition into :math:B_{i-1} / r blocks, where :math:B_{i-1} is the number of blocks in the last step. nmerge_sweeps : int (optional, default: 10) The number of merge sweeps done, where in each sweep a better merge candidate is searched for every block. max_B : int (optional, default: None) Maximum number of blocks tried. If not supplied, it will be automatically determined. min_B : int (optional, default: 1) Minimum number of blocks tried. mid_B : int (optional, default: None) Middle of the range which brackets the minimum. If not supplied, will be automatically determined. clabel : :class:~graph_tool.PropertyMap (optional, default: None) Constraint labels on the vertices, such that vertices with different labels cannot belong to the same block. checkpoint : function (optional, default: None) If provided, this function will be called after each call to :func:mcmc_sweep. This can be used to store the current state, so it can be continued later. The function must have the following signature: .. code-block:: python def checkpoint(state, L, delta, nmoves, minimize_state): ... where state is either a :class:~graph_tool.community.BlockState instance or None, L is the current description length, delta is the entropy difference in the last MCMC sweep, and nmoves is the number of accepted block membership moves. The minimize_state argument is a :class:~graph_tool.community.MinimizeState instance which specifies the current state of the algorithm, which can be stored via :mod:pickle, and supplied via the minimize_state option below to continue from an interrupted run. This function will also be called when the MCMC has finished for the current value of :math:B, in which case state == None, and the remaining parameters will be zero, except the last. minimize_state : :class:~graph_tool.community.MinimizeState (optional, default: None) If provided, this will specify an exact point of execution from which the algorithm will continue. The expected object is a :class:~graph_tool.community.MinimizeState instance which will be passed to the callback of the checkpoint option above, and can be stored by :mod:pickle. exhaustive : bool (optional, default: False) If True, the best value of B will be found by testing all possible values, instead of performing a bisection search. max_BE : int (optional, default: 1000) If the number of blocks exceeds this number, a sparse representation of the block graph is used, which is slightly less efficient, but uses less memory, verbose : bool (optional, default: False) If True, verbose information is displayed. Returns ------- b : :class:~graph_tool.PropertyMap Vertex property map with the best block partition. min_dl : float Minimum value of the description length (in nats _ per edge). Notes ----- This algorithm attempts to find a block partition of an unspecified size which minimizes the description length of the network, .. math:: \Sigma_{t/c} = \mathcal{S}_{t/c} + \mathcal{L}_{t/c}, where :math:\mathcal{S}_{t/c} is the blockmodel entropy (as described in the docstring of :func:mcmc_sweep and :meth:BlockState.entropy) and :math:\mathcal{L}_{t/c} is the information necessary to describe the model (as described in the docstring of :func:model_entropy and :meth:BlockState.entropy). The algorithm works by minimizing the entropy :math:\mathcal{S}_{t/c} for specific values of :math:B via :func:mcmc_sweep (with :math:\beta = 1 and :math:\beta\to\infty), and minimizing :math:\Sigma_{t/c} via an one-dimensional Fibonacci search on :math:B. See [peixoto-parsimonious-2013]_ for more details. This algorithm has a complexity of :math:O(\tau N\ln^2 B_{\text{max}}), where :math:N is the number of nodes in the network, :math:\tau is the mixing time of the MCMC, and :math:B_{\text{max}} is the maximum number of blocks considered. If :math:B_{\text{max}} is not supplied, it is computed as :math:\sim\sqrt{E} via :func:get_max_B, in which case the complexity becomes :math:O(\tau E\ln E). Examples -------- .. testsetup:: mdl gt.seed_rng(42) np.random.seed(42) .. doctest:: mdl >>> g = gt.collection.data["polbooks"] >>> b, mdl = gt.minimize_blockmodel_dl(g) >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=b, vertex_shape=b, output="polbooks_blocks_mdl.pdf") <...> .. testcleanup:: mdl gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=b, vertex_shape=b, output="polbooks_blocks_mdl.png") .. figure:: polbooks_blocks_mdl.* :align: center Block partition of a political books network, which minimizes the description length of the network according to the degree-corrected stochastic blockmodel. References ---------- .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, Samuel Leinhardt, "Stochastic blockmodels: First steps", Carnegie-Mellon University, Pittsburgh, PA 15213, U.S.A., :doi:10.1016/0378-8733(83)90021-7 .. [faust-blockmodels-1992] Katherine Faust, and Stanley Wasserman. "Blockmodels: Interpretation and Evaluation." Social Networks 14, no. 1-2 (1992): 5-61. :doi:10.1016/0378-8733(92)90013-W .. [karrer-stochastic-2011] Brian Karrer, and M. E. J. Newman. "Stochastic Blockmodels and Community Structure in Networks." Physical Review E 83, no. 1 (2011): 016107. :doi:10.1103/PhysRevE.83.016107. .. [peixoto-entropy-2012] Tiago P. Peixoto "Entropy of Stochastic Blockmodel Ensembles." Physical Review E 85, no. 5 (2012): 056122. :doi:10.1103/PhysRevE.85.056122, :arxiv:1112.6028. .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks", Phys. Rev. Lett. 110, 148701 (2013), :doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794. .. [peixoto-efficient-2013] Tiago P. Peixoto, "Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models", :arxiv:1310.4378. """ if max_B is None: if dense: max_B = max(g.num_vertices(), 1) else: max_B = get_max_B(g.num_vertices(), g.num_edges(), g.is_directed()) if verbose: print("max_B:", max_B) if min_B is None: min_B = 1 if mid_B is None: mid_B = get_mid(min_B, max_B) greedy = greedy_cooling shrink = True if minimize_state is None: minimize_state = MinimizeState() b_cache = minimize_state.b_cache checkpoint_state = minimize_state.checkpoint_state if exhaustive: if max_B not in b_cache: bi = g.new_vertex_property("int") bi.fa = range(g.num_vertices()) state = BlockState(g, B=g.num_vertices(), b=bi, vweight=vweight, eweight=eweight, clabel=clabel, deg_corr=deg_corr, max_BE=max_BE) for B in reversed(range(min_B, max_B + 1)): if B in b_cache: bi = g.new_vertex_property("int") bi.fa = b_cache[B][1] state = BlockState(g, b=bi, vweight=vweight, eweight=eweight, clabel=clabel, deg_corr=deg_corr, max_BE=max_BE) if checkpoint_state[B].get("done", False): continue state = multilevel_minimize(state, B, nsweeps=nsweeps, adaptive_sweeps=adaptive_sweeps, r=r, greedy=greedy, anneal=anneal, c=c, dense=dense and not sparse_heuristic, multigraph=dense, random_move=random_move, sequential=sequential, nmerge_sweeps=nmerge_sweeps, epsilon=epsilon, checkpoint=checkpoint, minimize_state=checkpoint_state, verbose=verbose) dl = get_state_dl(state, dense, nested_dl) b_cache[B] = [dl, array(state.b.fa)] if verbose: print("Result for B=%d: L=%g" % (B, dl)) min_dl = float(inf) best_B = None for Bi in b_cache.keys(): if b_cache[Bi][0] <= min_dl: min_dl = b_cache[Bi][0] best_B = Bi if verbose: print("Best result: B=%d, L=%g" % (best_B, min_dl)) b = g.new_vertex_property("int") b.fa = b_cache[best_B][1] return b, b_cache[best_B][0] args = dict(g=g, vweight=vweight, eweight=eweight, nsweeps=nsweeps, adaptive_sweeps=adaptive_sweeps, c=c, random_move=random_move, sequential=sequential, shrink=shrink, r=r, anneal=anneal, greedy=greedy, epsilon=epsilon, nmerge_sweeps=nmerge_sweeps, clabel=clabel, deg_corr=deg_corr, dense=dense, sparse_heuristic=sparse_heuristic, checkpoint=checkpoint, minimize_state=minimize_state, max_BE=max_BE, nested_dl=nested_dl, verbose=verbose) # Initial bracketing while True: f_max = get_b_dl(B=max_B, **args) f_mid = get_b_dl(B=mid_B, **args) f_min = get_b_dl(B=min_B, **args) if verbose: print("Current bracket:", (min_B, mid_B, max_B), (f_min, f_mid, f_max)) if checkpoint is not None: checkpoint(None, 0, 0, 0, minimize_state) if f_max > f_mid > f_min: max_B = mid_B mid_B = get_mid(min_B, mid_B) elif f_max < f_mid < f_min: min_B = mid_B mid_B = get_mid(mid_B, max_B) else: break # Fibonacci search while True: if max_B - mid_B > mid_B - min_B: x = get_mid(mid_B, max_B) else: x = get_mid(min_B, mid_B) f_x = get_b_dl(B=x, **args) f_mid = get_b_dl(B=mid_B, **args) if verbose: print("Current bracket:", (min_B, mid_B, max_B), (get_b_dl(B=min_B, **args), f_mid, get_b_dl(B=max_B, **args))) print("Bisect at", x, "with L=%g" % f_x) if max_B - mid_B <= 1: min_dl = float(inf) best_B = None for Bi in b_cache.keys(): if b_cache[Bi][0] <= min_dl: min_dl = b_cache[Bi][0] best_B = Bi if verbose: print("Best result: B=%d, L=%g" % (best_B, min_dl)) b = g.new_vertex_property("int") b.fa = b_cache[best_B][1] return b, b_cache[best_B][0] if checkpoint is not None: checkpoint(None, 0, 0, 0, minimize_state) if f_x < f_mid: if max_B - mid_B > mid_B - min_B: min_B = mid_B mid_B = x else: max_B = mid_B mid_B = x else: if max_B - mid_B > mid_B - min_B: max_B = x else: min_B = x def collect_edge_marginals(state, p=None): r"""Collect the edge marginal histogram, which counts the number of times the endpoints of each node have been assigned to a given block pair. This should be called multiple times, after repeated runs of the :func:mcmc_sweep function. Parameters ---------- state : :class:~graph_tool.community.BlockState The block state. p : :class:~graph_tool.PropertyMap (optional, default: None) Edge property map with vector-type values, storing the previous block membership counts. Each vector entry corresponds to b[i] + B * b[j], where b is the block membership and i = min(source(e), target(e)) and j = max(source(e), target(e)). If not provided, an empty histogram will be created. Returns ------- p : :class:~graph_tool.PropertyMap (optional, default: None) Vertex property map with vector-type values, storing the accumulated block membership counts. Examples -------- .. testsetup:: collect_edge_marginals gt.seed_rng(42) np.random.seed(42) .. doctest:: collect_edge_marginals >>> g = gt.collection.data["polbooks"] >>> state = gt.BlockState(g, B=4, deg_corr=True) >>> pe = None >>> for i in range(1000): # remove part of the transient ... ds, nmoves = gt.mcmc_sweep(state) >>> for i in range(1000): ... ds, nmoves = gt.mcmc_sweep(state) ... pe = gt.collect_edge_marginals(state, pe) >>> gt.bethe_entropy(state, pe)[0] 17.443089842818125 """ if p is None: p = state.g.new_edge_property("vector") libcommunity.edge_marginals(state.g._Graph__graph, state.bg._Graph__graph, state.B, _prop("v", state.g, state.b), _prop("e", state.g, p)) return p def collect_vertex_marginals(state, p=None): r"""Collect the vertex marginal histogram, which counts the number of times a node was assigned to a given block. This should be called multiple times, after repeated runs of the :func:mcmc_sweep function. Parameters ---------- state : :class:~graph_tool.community.BlockState The block state. p : :class:~graph_tool.PropertyMap (optional, default: None) Vertex property map with vector-type values, storing the previous block membership counts. If not provided, an empty histogram will be created. Returns ------- p : :class:~graph_tool.PropertyMap (optional, default: None) Vertex property map with vector-type values, storing the accumulated block membership counts. Examples -------- .. testsetup:: cvm gt.seed_rng(42) np.random.seed(42) .. doctest:: cvm >>> g = gt.collection.data["polbooks"] >>> state = gt.BlockState(g, B=4, deg_corr=True) >>> pv = None >>> for i in range(1000): # remove part of the transient ... ds, nmoves = gt.mcmc_sweep(state) >>> for i in range(1000): ... ds, nmoves = gt.mcmc_sweep(state) ... pv = gt.collect_vertex_marginals(state, pv) >>> gt.mf_entropy(state, pv) 19.94955528942717 >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft_B4.pdf") <...> .. testcleanup:: cvm gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft_B4.png") .. figure:: polbooks_blocks_soft_B4.* :align: center "Soft" block partition of a political books network with :math:B=4. """ B = state.B if p is None: p = state.g.new_vertex_property("vector") libcommunity.vertex_marginals(state.g._Graph__graph, _prop("v", state.g, state.b), _prop("v", state.g, p)) return p def bethe_entropy(state, p): r"""Compute the Bethe entropy given the edge block membership marginals. Parameters ---------- state : :class:~graph_tool.community.BlockState The block state. p : :class:~graph_tool.PropertyMap Edge property map with vector-type values, storing the previous block membership counts. Each vector entry corresponds to b[i] + B * b[j], where b is the block membership and i = min(source(e), target(e)) and j = max(source(e), target(e)). Returns ------- H : float The Bethe entropy value (in nats _) Hmf : float The "mean field" entropy value (in nats _), as would be returned by the :func:mf_entropy function. pv : :class:~graph_tool.PropertyMap (optional, default: None) Vertex property map with vector-type values, storing the accumulated block membership counts. These are the node marginals, as would be returned by the :func:collect_vertex_marginals function. Notes ----- The Bethe entropy is defined as, .. math:: H = -\sum_{e,(r,s)}\pi_{(r,s)}^e\ln\pi_{(r,s)}^e - \sum_{v,r}(1-k_i)\pi_r^v\ln\pi_r^v, where :math:\pi_{(r,s)}^e is the marginal probability that the endpoints of the edge :math:e belong to blocks :math:(r,s), and :math:\pi_r^v is the marginal probability that vertex :math:v belongs to block :math:r, and :math:k_i is the degree of vertex :math:v (or total degree for directed graphs). References ---------- .. [mezard-information-2009] Marc Mézard, Andrea Montanari, "Information, Physics, and Computation", Oxford Univ Press, 2009. """ B = state.B H = 0 pv = state.g.new_vertex_property("vector") H, sH, Hmf, sHmf = libcommunity.bethe_entropy(state.g._Graph__graph, state.B, _prop("e", state.g, p), _prop("v", state.g, pv)) return H, Hmf, pv def mf_entropy(state, p): r"""Compute the "mean field" entropy given the vertex block membership marginals. Parameters ---------- state : :class:~graph_tool.community.BlockState The block state. p : :class:~graph_tool.PropertyMap Vertex property map with vector-type values, storing the accumulated block membership counts. Returns ------- Hmf : float The "mean field" entropy value (in nats _). Notes ----- The "mean field" entropy is defined as, .. math:: H = - \sum_{v,r}\pi_r^v\ln\pi_r^v, where :math:\pi_r^v is the marginal probability that vertex :math:v belongs to block :math:r. References ---------- .. [mezard-information-2009] Marc Mézard, Andrea Montanari, "Information, Physics, and Computation", Oxford Univ Press, 2009. """ H = 0 for v in state.g.vertices(): N = p[v].a.sum() if N == 0: continue pvi = asarray(p[v].a, dtype="float") / N pvi = pvi[pvi > 0] H -= (pvi * log(pvi)).sum() return H def condensation_graph(g, prop, vweight=None, eweight=None, avprops=None, aeprops=None, self_loops=False): r""" Obtain the condensation graph, where each vertex with the same 'prop' value is condensed in one vertex. Parameters ---------- g : :class:~graph_tool.Graph Graph to be modelled. prop : :class:~graph_tool.PropertyMap Vertex property map with the community partition. vweight : :class:~graph_tool.PropertyMap (optional, default: None) Vertex property map with the optional vertex weights. eweight : :class:~graph_tool.PropertyMap (optional, default: None) Edge property map with the optional edge weights. avprops : list of :class:~graph_tool.PropertyMap (optional, default: None) If provided, the average value of each property map in this list for each vertex in the condensed graph will be computed and returned. aeprops : list of :class:~graph_tool.PropertyMap (optional, default: None) If provided, the average value of each property map in this list for each edge in the condensed graph will be computed and returned. self_loops : bool (optional, default: False) If True, self-loops due to intra-block edges are also included in the condensation graph. Returns ------- condensation_graph : :class:~graph_tool.Graph The community network prop : :class:~graph_tool.PropertyMap The community values. vcount : :class:~graph_tool.PropertyMap A vertex property map with the vertex count for each community. ecount : :class:~graph_tool.PropertyMap An edge property map with the inter-community edge count for each edge. va : list of :class:~graph_tool.PropertyMap A list of vertex property maps with average values of the properties passed via the avprops parameter. ea : list of :class:~graph_tool.PropertyMap A list of edge property maps with average values of the properties passed via the avprops parameter. See Also -------- community_structure: Obtain the community structure modularity: Calculate the network modularity condensation_graph: Network of communities, or blocks Notes ----- Each vertex in the condensation graph represents one community in the original graph (vertices with the same 'prop' value), and the edges represent existent edges between vertices of the respective communities in the original graph. Examples -------- .. testsetup:: condensation_graph gt.seed_rng(43) np.random.seed(42) Let's first obtain the best block partition with B=5. .. doctest:: condensation_graph >>> g = gt.collection.data["polbooks"] >>> state = gt.BlockState(g, B=5, deg_corr=True) >>> for i in range(1000): # remove part of the transient ... ds, nmoves = gt.mcmc_sweep(state) >>> for i in range(1000): ... ds, nmoves = gt.mcmc_sweep(state, beta=float("inf")) >>> b = state.get_blocks() >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=b, vertex_shape=b, output="polbooks_blocks_B5.pdf") <...> Now we get the condensation graph: .. doctest:: condensation_graph >>> bg, bb, vcount, ecount, avp, aep = gt.condensation_graph(g, b, avprops=[g.vp["pos"]], self_loops=True) >>> gt.graph_draw(bg, pos=avp[0], vertex_fill_color=bb, vertex_shape=bb, ... vertex_size=gt.prop_to_size(vcount, mi=40, ma=100), ... edge_pen_width=gt.prop_to_size(ecount, mi=2, ma=10), ... output="polbooks_blocks_B5_cond.pdf") <...> .. testcleanup:: condensation_graph gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=b, vertex_shape=b, output="polbooks_blocks_B5.png") gt.graph_draw(bg, pos=avp[0], vertex_fill_color=bb, vertex_shape=bb, vertex_size=gt.prop_to_size(vcount, mi=40, ma=100), edge_pen_width=gt.prop_to_size(ecount, mi=2, ma=10), output="polbooks_blocks_B5_cond.png") .. figure:: polbooks_blocks_B5.* :align: left Block partition of a political books network with :math:B=5. .. figure:: polbooks_blocks_B5_cond.* :align: right Condensation graph of the obtained block partition. """ gp = Graph(directed=g.is_directed()) if vweight is None: vcount = gp.new_vertex_property("int32_t") else: vcount = gp.new_vertex_property(vweight.value_type()) if eweight is None: ecount = gp.new_edge_property("int32_t") else: ecount = gp.new_edge_property(eweight.value_type()) if prop is g.vertex_index: prop = prop.copy(value_type="int32_t") cprop = gp.new_vertex_property(prop.value_type()) if avprops is None: avprops = [] avp = [] r_avp = [] for p in avprops: if p is g.vertex_index: p = p.copy(value_type="int") if "string" in p.value_type(): raise ValueError("Cannot compute average of string properties!") temp = g.new_vertex_property(p.value_type()) cp = gp.new_vertex_property(p.value_type()) avp.append((_prop("v", g, p), _prop("v", g, temp), _prop("v", g, cp))) r_avp.append(cp) if aeprops is None: aeprops = [] aep = [] r_aep = [] for p in aeprops: if p is g.edge_index: p = p.copy(value_type="int") if "string" in p.value_type(): raise ValueError("Cannot compute average of string properties!") temp = g.new_edge_property(p.value_type()) cp = gp.new_edge_property(p.value_type()) aep.append((_prop("e", g, p), _prop("e", g, temp), _prop("e", g, cp))) r_aep.append(cp) libcommunity.community_network(g._Graph__graph, gp._Graph__graph, _prop("v", g, prop), _prop("v", gp, cprop), _prop("v", gp, vcount), _prop("e", gp, ecount), _prop("v", g, vweight), _prop("e", g, eweight), self_loops) u = GraphView(g, directed=True) libcommunity.community_network_vavg(u._Graph__graph, gp._Graph__graph, _prop("v", g, prop), _prop("v", gp, cprop), _prop("v", gp, vcount), _prop("v", g, vweight), avp) libcommunity.community_network_eavg(u._Graph__graph, gp._Graph__graph, _prop("v", g, prop), _prop("v", gp, cprop), _prop("e", gp, ecount), _prop("e", g, eweight), aep, self_loops) return gp, cprop, vcount, ecount, r_avp, r_aep