Commit c0b1794f authored by Tiago Peixoto's avatar Tiago Peixoto

inference: update cookbook and docstrings

parent 100f01d4
......@@ -34,7 +34,7 @@ which yields
<Graph object, undirected, with 115 vertices and 613 edges, 4 internal vertex properties, 2 internal graph properties, at 0x...>
we then fit the degree-corrected model by calling
We then fit the degree-corrected model by calling:
.. testcode:: football
......
......@@ -25,8 +25,8 @@ we have
.. testoutput:: model-selection
:options: +NORMALIZE_WHITESPACE
Non-degree-corrected DL: 8520.825480...
Degree-corrected DL: 8227.987410...
Non-degree-corrected DL: 8553.474528...
Degree-corrected DL: 8266.554118...
Since it yields the smallest description length, the degree-corrected
fit should be preferred. The statistical significance of the choice can
......@@ -52,7 +52,7 @@ fits. In our particular case, we have
.. testoutput:: model-selection
:options: +NORMALIZE_WHITESPACE
ln Λ: -292.838070...
ln Λ: -286.920410...
The precise threshold that should be used to decide when to `reject a
hypothesis <https://en.wikipedia.org/wiki/Hypothesis_testing>`_ is
......@@ -80,11 +80,11 @@ example, for the American football network above, we have:
.. testoutput:: model-selection
:options: +NORMALIZE_WHITESPACE
Non-degree-corrected DL: 1757.843826...
Degree-corrected DL: 1809.861996...
ln Λ: -52.018170...
Non-degree-corrected DL: 1738.138494...
Degree-corrected DL: 1780.576716...
ln Λ: -42.438221...
Hence, with a posterior odds ratio of :math:`\Lambda \approx \mathrm{e}^{-52} \approx
10^{-22}` in favor of the non-degree-corrected model, we conclude that the
Hence, with a posterior odds ratio of :math:`\Lambda \approx \mathrm{e}^{-42} \approx
10^{-19}` in favor of the non-degree-corrected model, we conclude that the
degree-corrected variant is an unnecessarily complex description for
this network.
......@@ -164,15 +164,14 @@ simple example, using
# intervals of 10 sweeps:
u = None # marginal posterior edge probabilities
pv = None # marginal posterior group membership probabilities
bs = [] # partitions
cs = [] # average local clustering coefficient
def collect_marginals(s):
global pv, u, cs
global u, bs, cs
u = s.collect_marginal(u)
bstate = s.get_block_state()
b = gt.perfect_prop_hash([bstate.levels[0].b])[0]
pv = bstate.levels[0].collect_vertex_marginals(pv, b=b)
bs.append(bstate.levels[0].b.a.copy())
cs.append(gt.local_clustering(s.get_graph()).fa.mean())
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
......@@ -223,7 +222,10 @@ reconstructed network:
bstate = state.get_block_state()
bstate = bstate.levels[0].copy(g=u)
pv = u.own_property(pv)
# Disambiguate partitions and obtain marginals
pmode = gt.PartitionModeState(bs, converge=True)
pv = pmode.get_marginal(u)
edash = u.new_ep("vector<double>")
edash[u.edge(15, 73)] = [.1, .1, 0]
bstate.draw(pos=u.own_property(g.vp.pos), vertex_shape="pie", vertex_pie_fractions=pv,
......@@ -293,7 +295,7 @@ with uniform error rates, as we see with the same example:
# intervals of 10 sweeps:
u = None # marginal posterior edge probabilities
pv = None # marginal posterior group membership probabilities
bs = [] # partitions
cs = [] # average local clustering coefficient
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
......@@ -412,15 +414,14 @@ inference:
# intervals of 10 sweeps:
u = None # marginal posterior edge probabilities
pv = None # marginal posterior group membership probabilities
bs = [] # partitions
cs = [] # average local clustering coefficient
def collect_marginals(s):
global pv, u, cs
global bs, u, cs
u = s.collect_marginal(u)
bstate = s.get_block_state()
b = gt.perfect_prop_hash([bstate.levels[0].b])[0]
pv = bstate.levels[0].collect_vertex_marginals(pv, b=b)
bs.append(bstate.levels[0].b.a.copy())
cs.append(gt.local_clustering(s.get_graph()).fa.mean())
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
......@@ -465,7 +466,11 @@ the same measurement probability. The reconstructed network is visualized below.
bstate = state.get_block_state()
bstate = bstate.levels[0].copy(g=u)
pv = u.own_property(pv)
# Disambiguate partitions and obtain marginals
pmode = gt.PartitionModeState(bs, converge=True)
pv = pmode.get_marginal(u)
bstate.draw(pos=u.own_property(g.vp.pos), vertex_shape="pie", vertex_pie_fractions=pv,
edge_color=ecolor, edge_dash_style=edash, edge_gradient=None,
output="lesmis-uncertain-reconstruction-marginals.svg")
......@@ -516,14 +521,13 @@ latent multiedges of a network of political books:
# intervals of 10 sweeps:
u = None # marginal posterior multigraph
pv = None # marginal posterior group membership probabilities
bs = [] # partitions
def collect_marginals(s):
global pv, u
global bs, u
u = s.collect_marginal_multigraph(u)
bstate = state.get_block_state()
b = gt.perfect_prop_hash([bstate.levels[0].b])[0]
pv = bstate.levels[0].collect_vertex_marginals(pv, b=b)
bs.append(bstate.levels[0].b.a.copy())
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
callback=collect_marginals)
......@@ -538,7 +542,11 @@ latent multiedges of a network of political books:
bstate = state.get_block_state()
bstate = bstate.levels[0].copy(g=u)
pv = u.own_property(pv)
# Disambiguate partitions and obtain marginals
pmode = gt.PartitionModeState(bs, converge=True)
pv = pmode.get_marginal(u)
bstate.draw(pos=u.own_property(g.vp.pos), vertex_shape="pie", vertex_pie_fractions=pv,
edge_pen_width=gt.prop_to_size(ew, .1, 8, power=1), edge_gradient=None,
output="polbooks-erased-poisson.svg")
......
......@@ -117,14 +117,13 @@ epidemic process.
# intervals of 10 sweeps:
gm = None
bm = None
bs = []
betas = []
def collect_marginals(s):
global gm, bm
global gm, bs
gm = s.collect_marginal(gm)
b = gt.perfect_prop_hash([s.bstate.b])[0]
bm = s.bstate.collect_vertex_marginals(bm, b=b)
bs.append(s.bstate.b.a.copy())
betas.append(s.params["global_beta"])
gt.mcmc_equilibrate(rstate, force_niter=10000, mcmc_args=dict(niter=10, xstep=0),
......@@ -132,9 +131,13 @@ epidemic process.
print("Posterior similarity: ", gt.similarity(g, gm, g.new_ep("double", 1), gm.ep.eprob))
print("Inferred infection probability: %g ± %g" % (mean(betas), std(betas)))
# Disambiguate partitions and obtain marginals
pmode = gt.PartitionModeState(bs, converge=True)
pv = pmode.get_marginal(gm)
gt.graph_draw(gm, gm.own_property(g.vp.pos), vertex_shape="pie", vertex_color="black",
vertex_pie_fractions=gm.own_property(bm), vertex_pen_width=1,
vertex_pie_fractions=pv, vertex_pen_width=1,
edge_pen_width=gt.prop_to_size(gm.ep.eprob, 0, 5),
eorder=gm.ep.eprob, output="dolphins-posterior.svg")
......
......@@ -81,37 +81,41 @@ Note that the value of ``wait`` above was made purposefully low so that
the output would not be overly long. The most appropriate value requires
experimentation, but a typically good value is ``wait=1000``.
The function :func:`~graph_tool.inference.mcmc.mcmc_equilibrate` accepts a
``callback`` argument that takes an optional function to be invoked
The function :func:`~graph_tool.inference.mcmc.mcmc_equilibrate` accepts
a ``callback`` argument that takes an optional function to be invoked
after each call to
:meth:`~graph_tool.inference.blockmodel.BlockState.multiflip_mcmc_sweep`. This function
should accept a single parameter which will contain the actual
:class:`~graph_tool.inference.blockmodel.BlockState` instance. We will use this in
the example below to collect the posterior vertex marginals (via
:class:`~graph_tool.inference.blockmodel.BlockState.collect_vertex_marginals`),
i.e. the posterior probability that a node belongs to a given group:
:meth:`~graph_tool.inference.blockmodel.BlockState.multiflip_mcmc_sweep`. This
function should accept a single parameter which will contain the actual
:class:`~graph_tool.inference.blockmodel.BlockState` instance. We will
use this in the example below to collect the posterior vertex marginals
(via :class:`~graph_tool.inference.partition_modes.PartitionModeState`,
which disambiguates group labels [peixoto-revealing-2020]_), i.e. the
posterior probability that a node belongs to a given group:
.. testcode:: model-averaging
# We will first equilibrate the Markov chain
gt.mcmc_equilibrate(state, wait=1000, mcmc_args=dict(niter=10))
pv = None
bs = [] # collect some partitions
def collect_marginals(s):
global pv
b = gt.perfect_prop_hash([s.b])[0]
pv = s.collect_vertex_marginals(pv, b=b)
def collect_partitions(s):
global bs
bs.append(s.b.a.copy())
# Now we collect the marginals for exactly 100,000 sweeps, at
# intervals of 10 sweeps:
# Now we collect partitions for exactly 100,000 sweeps, at intervals
# of 10 sweeps:
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
callback=collect_marginals)
callback=collect_partitions)
# Disambiguate partitions and obtain marginals
pmode = gt.PartitionModeState(bs, converge=True)
pv = pmode.get_marginal(g)
# Now the node marginals are stored in property map pv. We can
# visualize them as pie charts on the nodes:
state.draw(pos=g.vp.pos, vertex_shape="pie", vertex_pie_fractions=pv,
edge_gradient=None, output="lesmis-sbm-marginals.svg")
output="lesmis-sbm-marginals.svg")
.. figure:: lesmis-sbm-marginals.*
:align: center
......@@ -135,8 +139,8 @@ itself, as follows.
B = s.get_nonempty_B()
h[B] += 1
# Now we collect the marginals for exactly 100,000 sweeps, at
# intervals of 10 sweeps:
# Now we collect partitions for exactly 100,000 sweeps, at intervals
# of 10 sweeps:
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
callback=collect_num_groups)
......@@ -194,7 +198,6 @@ network as above.
Change in description length: -73.716766...
Number of accepted vertex moves: 366160
.. warning::
When using
......@@ -212,28 +215,34 @@ Similarly to the the non-nested case, we can use
:func:`~graph_tool.inference.mcmc.mcmc_equilibrate` to do most of the boring
work, and we can now obtain vertex marginals on all hierarchical levels:
.. testcode:: nested-model-averaging
# We will first equilibrate the Markov chain
gt.mcmc_equilibrate(state, wait=1000, mcmc_args=dict(niter=10))
pv = [None] * len(state.get_levels())
# collect nested partitions
bs = []
def collect_marginals(s):
global pv
bs = [gt.perfect_prop_hash([s.b])[0] for s in state.get_levels()]
pv = [s.collect_vertex_marginals(pv[l], b=bs[l]) for l, s in enumerate(s.get_levels())]
def collect_partitions(s):
global bs
bs.append(s.get_bs())
# Now we collect the marginals for exactly 100,000 sweeps
gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10),
callback=collect_marginals)
callback=collect_partitions)
# Disambiguate partitions and obtain marginals
pmode = gt.PartitionModeState(bs, nested=True, converge=True)
pv = pmode.get_marginal(g)
# Get consensus estimate
bs = pmode.get_max_nested()
state = state.copy(bs=bs)
# Now the node marginals for all levels are stored in property map
# list pv. We can visualize the first level as pie charts on the nodes:
state_0 = state.get_levels()[0]
state_0.draw(pos=g.vp.pos, vertex_shape="pie", vertex_pie_fractions=pv[0],
edge_gradient=None, output="lesmis-nested-sbm-marginals.svg")
# We can visualize the marginals as pie charts on the nodes:
state.draw(vertex_shape="pie", vertex_pie_fractions=pv,
output="lesmis-nested-sbm-marginals.svg")
.. figure:: lesmis-nested-sbm-marginals.*
:align: center
......@@ -316,3 +325,79 @@ distribution.
:width: 200px
.. image:: lesmis-partition-sample-9.svg
:width: 200px
Characterizing the posterior distribution
+++++++++++++++++++++++++++++++++++++++++
The posterior distribution of partitions can have an elaborate
structure, containing multiple possible explanations for the data. In
order to summarize it, we can infer the modes of the distribution using
:class:`~graph_tool.inference.partition_modes.ModeClusterState`, as
described in [peixoto-revealing-2020]_. This amounts to identifying
clusters of partitions that are very similar to each other, but
sufficiently different from those that belong to other
clusters. Collective, such "modes" represent the different stories that
the data is telling us through the model. Here is an example using again
the Les Misérables network:
.. testcode:: partition-modes
g = gt.collection.data["lesmis"]
state = gt.NestedBlockState(g)
# Equilibration
gt.mcmc_equilibrate(state, force_niter=1000, mcmc_args=dict(niter=10))
bs = []
def collect_partitions(s):
global bs
bs.append(s.get_bs())
# We will collect only partitions 1000 partitions. For more accurate
# results, this number should be increased.
gt.mcmc_equilibrate(state, force_niter=1000, mcmc_args=dict(niter=10),
callback=collect_partitions)
# Infer partition modes
pmode = gt.ModeClusterState(bs, nested=True)
# Minimize the mode state itself
gt.mcmc_equilibrate(pmode, wait=1, mcmc_args=dict(niter=1, beta=np.inf))
# Get inferred modes
modes = pmode.get_modes()
for i, mode in enumerate(modes):
b = mode.get_max_nested() # mode's maximum
pv = mode.get_marginal(g) # mode's marginal distribution
print(f"Mode {i} with size {mode.get_M()/len(bs)}")
state = state.copy(bs=b)
state.draw(vertex_shape="pie", vertex_pie_fractions=pv,
output="lesmis-partition-mode-%i.svg" % i)
Running the above code gives us the relative size of each mode,
corresponding to their collective posterior probability.
.. testoutput:: partition-modes
Mode 0 with size 0.389389...
Mode 1 with size 0.352352...
Mode 2 with size 0.129129...
Mode 3 with size 0.117117...
Mode 4 with size 0.012012...
Below are the marginal node distributions representing the partitions that belong to each inferred mode:
.. image:: lesmis-partition-mode-0.svg
:width: 200px
.. image:: lesmis-partition-mode-1.svg
:width: 200px
.. image:: lesmis-partition-mode-2.svg
:width: 200px
.. image:: lesmis-partition-mode-3.svg
:width: 200px
.. image:: lesmis-partition-mode-4.svg
:width: 200px
......@@ -78,6 +78,9 @@ References
.. [peixoto-merge-split-2020] Tiago P. Peixoto, "Merge-split Markov
chain Monte Carlo for community detection", :arxiv:`2003.07070`
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
.. [peixoto-reconstructing-2018] Tiago P. Peixoto, "Reconstructing
networks with unknown and heterogeneous errors", Phys. Rev. X 8
041011 (2018). :doi:`10.1103/PhysRevX.8.041011`, :arxiv:`1806.07956`
......
......@@ -85,6 +85,7 @@ void export_partition_mode()
})
.def("align_mode", &PartitionModeState::align_mode)
.def("get_B", &PartitionModeState::get_B)
.def("get_M", &PartitionModeState::get_M)
.def("get_marginal",
+[](PartitionModeState& state,
GraphInterface& gi, boost::any obm)
......
......@@ -1213,6 +1213,11 @@ public:
return _B;
}
size_t get_M()
{
return _bs.size();
}
std::shared_ptr<PartitionModeState> get_coupled_state()
{
return _coupled_state;
......
......@@ -31,11 +31,11 @@ from scipy.special import gammaln
class PartitionCentroidState(object):
r"""Obtain the center of a set of partitions, according to the variation of
information metric or reduced mutual information..
information metric or reduced mutual information.
Parameters
----------
bs : iterable of iterable of `int`s
bs : iterable of iterable of ``int``
List of partitions.
b : ``list`` or :class:`numpy.ndarray` (optional, default: ``None``)
Initial partition. If not supplied, a partition into a single group will
......@@ -43,7 +43,6 @@ class PartitionCentroidState(object):
RMI : ``bool`` (optional, default: ``False``)
If ``True``, the reduced mutual information will be used, otherwise the
variation of information metric will be used instead.
"""
def __init__(self, bs, b=None, RMI=False):
......@@ -254,7 +253,6 @@ def variation_information(x, y, norm=False):
References
----------
.. [meila_comparing_2003] Marina Meilă, "Comparing Clusterings by the
Variation of Information," in Learning Theory and Kernel Machines,
Lecture Notes in Computer Science No. 2777, edited by Bernhard Schölkopf
......@@ -402,7 +400,6 @@ def reduced_mutual_information(x, y, norm=False):
References
----------
.. [newman_improved_2020] M. E. J. Newman, G. T. Cantwell and J.-G. Young,
"Improved mutual information measure for classification and community
detection", Phys. Rev. E, 101, 042304 (2020),
......
......@@ -48,6 +48,11 @@ class PartitionModeState(object):
:meth:`~graph_tool.inference.partition_modes.PartitionModeState.replace_partitions`
needs to be called repeatedly).
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
"""
def __init__(self, bs, relabel=True, nested=False, converge=False, **kwargs):
self.bs = {}
......@@ -144,7 +149,7 @@ class PartitionModeState(object):
def align_mode(self, mode):
r"""Relabel entire ensemble to align with another ensemble given by ``mode``,
which should be an instance of
:class:`~graph_tool.inference.PartitionModeState`."""
:class:`~graph_tool.inference.partition_modes.PartitionModeState`."""
self._base.align_mode(mode._base)
def get_partition(self, i):
......@@ -197,7 +202,7 @@ class PartitionModeState(object):
return self._base.posterior_lprob(b, MLE)
def get_coupled_state(self):
r"""Return the instance of :class:`~graph_tool.inference.PartitionModeState`
r"""Return the instance of :class:`~graph_tool.inference.partition_modes.PartitionModeState`
representing the model at the upper hierarchical level.
"""
base = self._base.get_coupled_state()
......@@ -231,6 +236,10 @@ class PartitionModeState(object):
r"""Return the total number of labels used."""
return self._base.get_B()
def get_M(self):
r"""Return the number of partitions"""
return self._base.get_M()
def sample_partition(self, MLE=True):
r"""Sampled a partition from the inferred model, using maximum likelihood
estimates for the marginal node probabilities if ```MLE=True```,
......@@ -262,6 +271,12 @@ class ModeClusterState(object):
instantiation, otherwise they will be incorporated as they are.
nested : ``bool`` (optional, default: ``False``)
If ``True``, the partitions will be assumed to be hierarchical.
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
"""
def __init__(self, bs, b=None, B=1, nested=False, relabel=True):
......@@ -328,14 +343,19 @@ class ModeClusterState(object):
def get_mode(self, r):
r"""Return the mode in cluster ``r`` as an instance of
:class:`~graph_tool.inference.PartitionModeState`. """
:class:`~graph_tool.inference.partition_modes.PartitionModeState`. """
base = self._state.get_mode(r);
return PartitionModeState(None, base=base, nested=self.nested)
def get_modes(self):
def get_modes(self, sort=True):
r"""Return the list of nonempty modes, as instances of
:class:`~graph_tool.inference.PartitionModeState`. """
return [self.get_mode(r) for r in np.unique(self.b)]
:class:`~graph_tool.inference.partition_modes.PartitionModeState`. If `sorted == True`,
the modes are retured in decreasing order with respect to their size.
"""
modes = [self.get_mode(r) for r in np.unique(self.b)]
if sort:
modes = list(sorted(modes, key=lambda m: -m.get_M()))
return modes
def get_wr(self):
r"""Return cluster sizes. """
......@@ -581,7 +601,8 @@ def partition_overlap(x, y, norm=True):
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
.. [kuhn_hungarian_1955] H. W. Kuhn, "The Hungarian method for the
assignment problem," Naval Research Logistics Quarterly 2, 83–97 (1955)
:doi:`10.1002/nav.3800020109`
......@@ -660,7 +681,8 @@ def nested_partition_overlap(x, y, norm=True):
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
.. [kuhn_hungarian_1955] H. W. Kuhn, "The Hungarian method for the
assignment problem," Naval Research Logistics Quarterly 2, 83–97 (1955)
:doi:`10.1002/nav.3800020109`
......@@ -875,6 +897,12 @@ def align_partition_labels(x, y):
[3 0 0 1 1 1 0 2 0]
>>> gt.align_partition_labels(y, x)
array([0, 2, 2, 1, 1, 1, 2, 3, 2], dtype=int32)
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
"""
x = np.asarray(x, dtype="int32").copy()
......@@ -987,6 +1015,12 @@ def partition_overlap_center(bs, init=None, relabel_bs=False):
[1 1 2 0 3 0 3 0 0 0 0] 0.07454545...
>>> gt.align_partition_labels(c, x)
array([5, 5, 2, 0, 1, 0, 1, 0, 0, 0, 0], dtype=int32)
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
"""
if relabel_bs:
......@@ -1072,6 +1106,12 @@ def nested_partition_overlap_center(bs, init=None, return_bs=False):
[array([1, 1, 2, 0, 3, 0, 3, 0, 0, 0, 0], dtype=int32), array([0, 1, 0, 1, 1], dtype=int32)] 0.084492...
>>> gt.align_nested_partition_labels(c, x)
[array([5, 5, 2, 0, 1, 0, 1, 0, 0, 0, 0], dtype=int32), array([ 0, 1, 0, -1, -1, 1], dtype=int32)]
References
----------
.. [peixoto-revealing-2020] Tiago P. Peixoto, "Revealing consensus and
dissensus between network partitions", :arxiv:`2005.13977`
"""
bs = [[np.asarray(bs[m][l], dtype="int32") for l in range(len(bs[m]))] for m in range(len(bs))]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment