Commit 34735af2 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Implement BlockState.collect_partition_histograms() and microstate_entropy()

parent 0ba0c262
......@@ -26,7 +26,7 @@ using namespace boost;
using namespace graph_tool;
void collect_vertex_marginals(GraphInterface& gi, boost::any ob,
boost::any op, double update)
boost::any op, size_t update)
{
typedef vprop_map_t<int32_t>::type vmap_t;
auto b = any_cast<vmap_t>(ob).get_unchecked();
......@@ -51,7 +51,7 @@ void collect_vertex_marginals(GraphInterface& gi, boost::any ob,
}
class BlockPairHist
: public gt_hash_map<std::pair<int32_t,int32_t>,double>
: public gt_hash_map<std::pair<int32_t,int32_t>,size_t>
{
public:
......@@ -75,12 +75,12 @@ public:
auto k = keys[i];
int32_t r = python::extract<int32_t>(k[0]);
int32_t s = python::extract<int32_t>(k[1]);
double v = python::extract<double>(state[k]);
size_t v = python::extract<size_t>(state[k]);
(*this)[make_pair(r, s)] = v;
}
}
double get_item(boost::python::object k)
size_t get_item(boost::python::object k)
{
int32_t r = python::extract<int32_t>(k[0]);
int32_t s = python::extract<int32_t>(k[1]);
......@@ -100,7 +100,7 @@ public:
};
void collect_edge_marginals(GraphInterface& gi, boost::any ob,
boost::any op, double update)
boost::any op, size_t update)
{
typedef vprop_map_t<int32_t>::type vmap_t;
auto b = any_cast<vmap_t>(ob).get_unchecked();
......@@ -222,18 +222,119 @@ boost::python::tuple bethe_entropy(GraphInterface& gi, boost::any op,
return boost::python::make_tuple(H, Hmf);
}
class PartitionHist
: public gt_hash_map<std::vector<int32_t>, size_t>
{
public:
boost::python::dict get_state()
{
boost::python::dict state;
for (auto& kv : *this)
state[kv.first] = kv.second;
return state;
}
void set_state(boost::python::dict state)
{
auto keys = state.keys();
for (int i = 0; i < python::len(keys); ++i)
{
auto& k = python::extract<std::vector<int32_t>&>(keys[i])();
size_t v = python::extract<size_t>(state[k]);
(*this)[k] = v;
}
}
size_t get_item(std::vector<int32_t>& k)
{
auto iter = this->find(k);
if (iter == this->end())
return 0;
return iter->second;
}
void set_item(std::vector<int32_t>& k, double v)
{
(*this)[k] = v;
}
};
void collect_partitions(boost::any ob, PartitionHist& h, size_t update)
{
typedef vprop_map_t<int32_t>::type vmap_t;
auto b = any_cast<vmap_t>(ob);
auto& v = b.get_storage();
h[v] += update;
}
void collect_hierarchical_partitions(python::object ovb, PartitionHist& h,
size_t update)
{
typedef vprop_map_t<int32_t>::type vmap_t;
vector<int32_t> v;
for (int i = 0; i < len(ovb); ++i)
{
boost::any ob = python::extract<boost::any>(ovb[i])();
auto b = any_cast<vmap_t>(ob);
auto& vi = b.get_storage();
v.reserve(v.size() + vi.size());
v.insert(v.end(), vi.begin(), vi.end());
}
h[v] += update;
}
double partitions_entropy(PartitionHist& h)
{
double S = 0;
size_t N = 0;
for (auto& kv : h)
{
if (kv.second == 0)
continue;
N += kv.second;
S -= kv.second * log(kv.second);
}
if (N > 0)
{
S /= N;
S += log(N);
}
return S;
}
void export_marginals()
{
using namespace boost::python;
class_<BlockPairHist>("BlockPairHist")
class_<BlockPairHist>("BlockPairHist",
"Histogram of block pairs, implemented in C++.\n"
"Interface supports querying and setting using pairs "
"of ints as keys, and ints as values.")
.def("__setitem__", &BlockPairHist::set_item)
.def("__getitem__", &BlockPairHist::get_item)
.def("__setstate__", &BlockPairHist::set_state)
.def("__getstate__", &BlockPairHist::get_state).enable_pickling();
.def("__getstate__", &BlockPairHist::get_state)
.def("asdict", &BlockPairHist::get_state,
"Return the histogram's contents as a dict.").enable_pickling();
class_<PartitionHist>("PartitionHist",
"Histogram of partitions, implemented in C++.\n"
"Interface supports querying and setting using Vector_int32_t "
"as keys, and ints as values.")
.def("__setitem__", &PartitionHist::set_item)
.def("__getitem__", &PartitionHist::get_item)
.def("__setstate__", &PartitionHist::set_state)
.def("__getstate__", &PartitionHist::get_state)
.def("asdict", &PartitionHist::get_state,
"Return the histogram's contents as a dict.").enable_pickling();
def("vertex_marginals", &collect_vertex_marginals);
def("edge_marginals", &collect_edge_marginals);
def("mf_entropy", &mf_entropy);
def("bethe_entropy", &bethe_entropy);
def("collect_partitions", &collect_partitions);
def("collect_hierarchical_partitions", &collect_hierarchical_partitions);
def("partitions_entropy", &partitions_entropy);
}
......@@ -72,9 +72,19 @@ Auxiliary functions
model_entropy
mf_entropy
bethe_entropy
microstate_entropy
half_edge_graph
get_block_edge_gradient
Auxiliary classes
=================
.. autosummary::
:nosignatures:
PartitionHist
BlockPairHist
Semiparametric stochastic block model inference
+++++++++++++++++++++++++++++++++++++++++++++++
......@@ -125,6 +135,9 @@ __all__ = ["minimize_blockmodel_dl",
"model_entropy",
"mf_entropy",
"bethe_entropy",
"microstate_entropy",
"PartitionHist",
"BlockPairHist",
"half_edge_graph",
"get_block_edge_gradient",
"get_hierarchy_tree"]
......
......@@ -39,6 +39,8 @@ from . util import *
from .. dl_import import dl_import
dl_import("from . import libgraph_tool_inference as libinference")
from . libgraph_tool_inference import PartitionHist, BlockPairHist
__test__ = False
def set_test(test):
......@@ -1562,7 +1564,7 @@ class BlockState(object):
assert nB == B, "wrong number of blocks after shrink: %d (should be %d)" % (nB, B)
return state
def collect_edge_marginals(self, p=None, update=1.):
def collect_edge_marginals(self, p=None, update=1):
r"""Collect the edge marginal histogram, which counts the number of times
the endpoints of each node have been assigned to a given block pair.
......@@ -1574,7 +1576,7 @@ class BlockState(object):
p : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
Edge property map with edge marginals to be updated. If not
provided, an empty histogram will be created.
update : float (optional, default: ``1.``)
update : float (optional, default: ``1``)
Each call increases the current count by the amount given by this
parameter.
......@@ -1615,7 +1617,7 @@ class BlockState(object):
update)
return p
def collect_vertex_marginals(self, p=None, update=1.):
def collect_vertex_marginals(self, p=None, update=1):
r"""Collect the vertex marginal histogram, which counts the number of times a
node was assigned to a given block.
......@@ -1627,7 +1629,7 @@ class BlockState(object):
p : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
Vertex property map with vector-type values, storing the previous block
membership counts. If not provided, an empty histogram will be created.
update : float (optional, default: ``1.``)
update : int (optional, default: ``1``)
Each call increases the current count by the amount given by this
parameter.
......@@ -1655,7 +1657,7 @@ class BlockState(object):
... ds, nmoves = state.mcmc_sweep(niter=10)
... pv = state.collect_vertex_marginals(pv)
>>> gt.mf_entropy(g, pv)
7.165235016952...
29.84318996...
>>> gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie",
... vertex_pie_fractions=pv, output="polbooks_blocks_soft_B4.pdf")
<...>
......@@ -1681,6 +1683,52 @@ class BlockState(object):
update)
return p
def collect_partition_histogram(self, h=None, update=1):
r"""Collect a histogram of partitions.
This should be called multiple times, e.g. after repeated runs of the
:meth:`graph_tool.inference.BlockState.mcmc_sweep` function.
Parameters
----------
h : :class:`~graph_tool.inference.PartitionHist` (optional, default: ``None``)
Partition histogram. If not provided, an empty histogram will be created.
update : float (optional, default: ``1``)
Each call increases the current count by the amount given by this
parameter.
Returns
-------
h : :class:`~graph_tool.inference.PartitionHist` (optional, default: ``None``)
Updated Partition histogram.
Examples
--------
.. testsetup:: cvm
gt.seed_rng(42)
np.random.seed(42)
.. doctest:: cvm
>>> g = gt.collection.data["polbooks"]
>>> state = gt.BlockState(g, B=4, deg_corr=True)
>>> ph = None
>>> state.mcmc_sweep(niter=1000) # remove part of the transient
(...)
>>> for i in range(1000):
... ds, nmoves = state.mcmc_sweep(niter=10)
... ph = state.collect_partition_histogram(ph)
>>> gt.microstate_entropy(ph)
5.215767...
"""
if h is None:
h = PartitionHist()
libinference.collect_partitions(_prop("v", self.g, self.b),
h, update)
return h
def draw(self, **kwargs):
r"""Convenience wrapper to :func:`~graph_tool.draw.graph_draw` that
draws the state of the graph as colors on the vertices and edges."""
......@@ -1872,5 +1920,38 @@ def mf_entropy(g, p):
return libinference.mf_entropy(g._Graph__graph,
_prop("v", g, p))
def microstate_entropy(h):
r"""Compute microstate entropy given a histogram of partitions.
Parameters
----------
h : :class:`~graph_tool.inference.PartitionHist` (optional, default: ``None``)
Partition histogram.
Returns
-------
H : ``float``
The microstate entropy value (in `nats <http://en.wikipedia.org/wiki/Nat_%28information%29>`_).
Notes
-----
The microstate entropy is defined as,
.. math::
H = - \sum_{\boldsymbol b}p({\boldsymbol b})\ln p({\boldsymbol b}),
where :math:`p({\boldsymbol b})` is observed frequency of partition
:math:`{\boldsymbol b}`.
References
----------
.. [mezard-information-2009] Marc Mézard, Andrea Montanari, "Information,
Physics, and Computation", Oxford Univ Press, 2009.
:DOI:`10.1093/acprof:oso/9780198570837.001.0001`
"""
return libinference.partitions_entropy(h)
from . overlap_blockmodel import *
\ No newline at end of file
......@@ -532,6 +532,33 @@ class NestedBlockState(object):
edge_list = [(lstate.b[u], lstate.b[v]) for u, v in edge_list]
return S
def collect_partition_histogram(self, h=None, update=1):
r"""Collect a histogram of partitions.
This should be called multiple times, e.g. after repeated runs of the
:meth:`graph_tool.inference.NestedBlockState.mcmc_sweep` function.
Parameters
----------
h : :class:`~graph_tool.inference.PartitionHist` (optional, default: ``None``)
Partition histogram. If not provided, an empty histogram will be created.
update : float (optional, default: ``1``)
Each call increases the current count by the amount given by this
parameter.
Returns
-------
h : :class:`~graph_tool.inference.PartitionHist` (optional, default: ``None``)
Updated Partition histogram.
"""
if h is None:
h = PartitionHist()
bs = [_prop("v", state.g, state.b) for state in self.levels]
libinference.collect_hierarchical_partitions(bs, h, update)
return h
def draw(self, **kwargs):
r"""Convenience wrapper to :func:`~graph_tool.draw.draw_hierarchy` that
draws the hierarchical state."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment