diff --git a/doc/demos/inference/inference.rst b/doc/demos/inference/inference.rst index 45db87a06c41655e5819b63abb00356cd476556f..a7fa12cadebb093f4a930ca93cce149a191c2b04 100644 --- a/doc/demos/inference/inference.rst +++ b/doc/demos/inference/inference.rst @@ -110,7 +110,7 @@ The stochastic block model the simplest generative process based on the notion of groups of nodes [holland-stochastic-1983]_. The microcanonical _ formulation -[peixoto-nonparametric-2016]_ of the basic or "traditional" version takes +[peixoto-nonparametric-2017]_ of the basic or "traditional" version takes as parameters the partition of the nodes into groups :math:\boldsymbol b and a :math:B\times B matrix of edge counts :math:\boldsymbol e, where :math:e_{rs} is the number of edges @@ -182,7 +182,7 @@ degree distributions. A better model for such networks is called the it is defined just like the traditional model, with the addition of the degree sequence :math:\boldsymbol k = \{k_i\} of the graph as an additional set of parameters (assuming again a microcanonical -formulation [peixoto-nonparametric-2016]_). +formulation [peixoto-nonparametric-2017]_). The nested stochastic block model @@ -461,7 +461,7 @@ case of the C. elegans network we have Since it yields the smallest description length, the degree-corrected fit should be preferred. The statistical significance of the choice can be accessed by inspecting the posterior odds ratio -[peixoto-nonparametric-2016]_ +[peixoto-nonparametric-2017]_ .. math:: @@ -918,7 +918,7 @@ Model class selection When averaging over partitions, we may be interested in evaluating which **model class** provides a better fit of the data, considering all possible parameter choices. This is done by evaluating the model -evidence [peixoto-nonparametric-2016]_ +evidence [peixoto-nonparametric-2017]_ .. math:: @@ -1383,8 +1383,9 @@ References blockmodels and community structure in networks", Phys. Rev. E 83, 016107 (2011), :doi:10.1103/PhysRevE.83.016107, :arxiv:1008.3926 -.. [peixoto-nonparametric-2016] Tiago P. Peixoto, "Nonparametric - Bayesian inference of the microcanonical stochastic block model" +.. [peixoto-nonparametric-2017] Tiago P. Peixoto, "Nonparametric + Bayesian inference of the microcanonical stochastic block model", + Phys. Rev. E 95 012317 (2017), :doi:10.1103/PhysRevE.95.012317, :arxiv:1610.02703 .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module diff --git a/src/graph_tool/inference/blockmodel.py b/src/graph_tool/inference/blockmodel.py index 6e415e9ab9633cfa2ff80f89b9d0729aa6cc22fc..a7a39501852a89301e08fa23f9dcc9cf8d85651e 100644 --- a/src/graph_tool/inference/blockmodel.py +++ b/src/graph_tool/inference/blockmodel.py @@ -738,7 +738,8 @@ class BlockState(object): degree_dl=True, degree_dl_kind="distributed", edges_dl=True, dense=False, multigraph=True, deg_entropy=True, recs=True, exact=True, **kwargs): - r"""Calculate the entropy associated with the current block partition. + r"""Calculate the entropy (a.k.a. negative log-likelihood) associated + with the current block partition. Parameters ---------- @@ -777,14 +778,15 @@ class BlockState(object): Notes ----- - The "entropy" of the state is minus the log-likelihood of the + The "entropy" of the state is the negative log-likelihood of the microcanonical SBM, that includes the generated graph - :math:\boldsymbol{A} and the model parameters :math:\boldsymbol{\theta}, + :math:\boldsymbol{A} and the model parameters + :math:\boldsymbol{\theta}, .. math:: - \mathcal{S} &= - \ln P(\boldsymbol{A},\boldsymbol{\theta}) \\ - &= - \ln P(\boldsymbol{A}|\boldsymbol{\theta}) - \ln P(\boldsymbol{\theta}). + \Sigma &= - \ln P(\boldsymbol{A},\boldsymbol{\theta}) \\ + &= - \ln P(\boldsymbol{A}|\boldsymbol{\theta}) - \ln P(\boldsymbol{\theta}). This value is also called the description length _ of the data, @@ -837,7 +839,7 @@ class BlockState(object): if multigraph == False, otherwise we replace :math:{n\choose m}\to\left(\!\!{n\choose m}\!\!\right) above, where - :math:\left(\!\!{n\choose m}\!\!\right) = {n+m-1\choose m}. A dense + :math:\left(\!\!{n\choose m}\!\!\right) = {n+m-1\choose m}. A "dense" entropy for the degree-corrected model is not available, and if requested will raise a :exc:NotImplementedError. @@ -856,7 +858,10 @@ class BlockState(object): P(\boldsymbol{k}|\boldsymbol{e},\boldsymbol{b}) = \prod_r\left(\!\!{n_r\choose e_r}\!\!\right)^{-1}. - 2. degree_dl_kind == "distributed" + This corresponds to a noninformative prior, where the degrees are + sampled from a uniform distribution. + + 2. degree_dl_kind == "distributed" (default) .. math:: @@ -868,6 +873,10 @@ class BlockState(object): _ of integer :math:n into at most :math:m parts. + This corresponds to a prior for the degree sequence conditioned on + the degree frequencies, which are themselves sampled from a uniform + hyperprior. This option should be preferred in most cases. + 3. degree_dl_kind == "entropy" .. math:: @@ -881,10 +890,20 @@ class BlockState(object): only an approximation of the description length. It is meant to be used only for comparison purposes, and should be avoided in practice. - For the directed case, the above expressions are duplicated for the in- and out-degrees. + References + ---------- + + .. [peixoto-nonparametric-2017] Tiago P. Peixoto, "Nonparametric + Bayesian inference of the microcanonical stochastic block model", + Phys. Rev. E 95 012317 (2017), :doi:10.1103/PhysRevE.95.012317, + :arxiv:1610.02703 + .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block + structures and high-resolution model selection in large networks ", + Phys. Rev. X 4, 011047 (2014), :doi:10.1103/PhysRevX.4.011047, + :arxiv:1310.4377. """ if _bm_test() and kwargs.get("test", True): @@ -1083,7 +1102,7 @@ class BlockState(object): return self._state.get_move_prob(int(v), s, self.b[v], c, True) def get_edges_prob(self, missing, spurious=[], entropy_args={}): - """Compute the joint log-probability of the missing and spurious edges given by + r"""Compute the joint log-probability of the missing and spurious edges given by missing and spurious (a list of (source, target) tuples, or :meth:~graph_tool.Edge instances), together with the observed edges. @@ -2150,15 +2169,10 @@ def model_entropy(B, N, E, directed=False, nr=None, allow_empty=True): .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks", Phys. Rev. Lett. 110, 148701 (2013), :doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794. - .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block - structures and high-resolution model selection in large networks ", - Phys. Rev. X 4, 011047 (2014), :doi:10.1103/PhysRevX.4.011047, - :arxiv:1310.4377. - .. [peixoto-model-2016] Tiago P. Peixoto, "Model selection and hypothesis - testing for large-scale network models with overlapping groups", - Phys. Rev. X 5, 011033 (2016), :doi:10.1103/PhysRevX.5.011033, - :arxiv:1409.3059. - + .. [peixoto-nonparametric-2017] Tiago P. Peixoto, "Nonparametric + Bayesian inference of the microcanonical stochastic block model", + Phys. Rev. E 95 012317 (2017), :doi:10.1103/PhysRevE.95.012317, + :arxiv:1610.02703 """ if directed: