Commit 48e68784 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

inference.blockmodel: Improve microstate entropy

This adds an option to store a single partitions instead of all its
possible permutations.
parent 7477469e
......@@ -223,7 +223,7 @@ boost::python::tuple bethe_entropy(GraphInterface& gi, boost::any op,
}
class PartitionHist
: public gt_hash_map<std::vector<int32_t>, size_t>
: public gt_hash_map<std::vector<int32_t>, double>
{
public:
......@@ -241,7 +241,7 @@ public:
for (int i = 0; i < python::len(keys); ++i)
{
auto& k = python::extract<std::vector<int32_t>&>(keys[i])();
size_t v = python::extract<size_t>(state[k]);
double v = python::extract<double>(state[k]);
(*this)[k] = v;
}
}
......@@ -261,40 +261,88 @@ public:
};
void collect_partitions(boost::any ob, PartitionHist& h, size_t update)
double log_n_permutations(const vector<int32_t>& b)
{
std::vector<int32_t> count(b.size());
for (auto bi : b)
count[bi]++;
double n = boost::lgamma(b.size() + 1);
for (auto nr : count)
n -= boost::lgamma(nr + 1);
return n;
}
vector<int32_t> unlabel_partition(vector<int32_t> b)
{
std::vector<int32_t> map(b.size(), -1);
size_t pos = 0;
for (auto& bi : b)
{
auto& x = map[bi];
if (x == -1)
{
x = pos;
++pos;
}
bi = x;
}
return b;
}
void collect_partitions(boost::any& ob, PartitionHist& h, double update,
bool unlabel)
{
typedef vprop_map_t<int32_t>::type vmap_t;
auto b = any_cast<vmap_t>(ob);
auto& b = any_cast<vmap_t&>(ob);
auto& v = b.get_storage();
h[v] += update;
if (unlabel)
{
auto vc = unlabel_partition(v);
h[vc] += update;
}
else
{
h[v] += update;
}
}
void collect_hierarchical_partitions(python::object ovb, PartitionHist& h,
size_t update)
size_t update, bool unlabel)
{
typedef vprop_map_t<int32_t>::type vmap_t;
vector<int32_t> v;
for (int i = 0; i < len(ovb); ++i)
{
boost::any ob = python::extract<boost::any>(ovb[i])();
auto b = any_cast<vmap_t>(ob);
boost::any& ob = python::extract<boost::any&>(ovb[i])();
auto& b = any_cast<vmap_t&>(ob);
auto& vi = b.get_storage();
v.reserve(v.size() + vi.size());
v.insert(v.end(), vi.begin(), vi.end());
if (unlabel)
{
auto vc = unlabel_partition(v);
v.insert(v.end(), vc.begin(), vc.end());
}
else
{
v.insert(v.end(), vi.begin(), vi.end());
}
v.push_back(-1);
}
h[v] += update;
}
double partitions_entropy(PartitionHist& h)
double partitions_entropy(PartitionHist& h, bool unlabeled)
{
double S = 0;
size_t N = 0;
for (auto& kv : h)
for (auto kv : h)
{
if (kv.second == 0)
continue;
N += kv.second;
S -= kv.second * log(kv.second);
if (unlabeled)
S += kv.second * log_n_permutations(kv.first);
}
if (N > 0)
{
......
......@@ -1721,7 +1721,7 @@ class BlockState(object):
update)
return p
def collect_partition_histogram(self, h=None, update=1):
def collect_partition_histogram(self, h=None, update=1, unlabel=True):
r"""Collect a histogram of partitions.
This should be called multiple times, e.g. after repeated runs of the
......@@ -1734,6 +1734,9 @@ class BlockState(object):
update : float (optional, default: ``1``)
Each call increases the current count by the amount given by this
parameter.
unlabel : bool (optional, default: ``True``)
If ``True``, the partition will be relabeled so that only one entry
for all its label permutations will be considered in the histogram.
Returns
-------
......@@ -1758,13 +1761,14 @@ class BlockState(object):
... ds, nmoves = state.mcmc_sweep(niter=10)
... ph = state.collect_partition_histogram(ph)
>>> gt.microstate_entropy(ph)
5.215767...
124.092176...
"""
if h is None:
h = PartitionHist()
libinference.collect_partitions(_prop("v", self.g, self.b),
h, update)
h, update, unlabel)
return h
def draw(self, **kwargs):
......@@ -1877,7 +1881,7 @@ def bethe_entropy(g, p):
g : :class:`~graph_tool.Graph`
The graph.
p : :class:`~graph_tool.PropertyMap`
Edge property map with edge marginals.
Edge property map with edge marginals.
Returns
-------
......@@ -1958,13 +1962,19 @@ def mf_entropy(g, p):
return libinference.mf_entropy(g._Graph__graph,
_prop("v", g, p))
def microstate_entropy(h):
def microstate_entropy(h, unlabel=True):
r"""Compute microstate entropy given a histogram of partitions.
Parameters
----------
h : :class:`~graph_tool.inference.PartitionHist` (optional, default: ``None``)
Partition histogram.
unlabel : bool (optional, default: ``True``)
If ``True``, it is assumed that partition were relabeled so that only
one entry for all its label permutations were considered in the
histogram. The entropy computed will correspond to the full distribution
over labelled partitions, where all permutations are assumed to be
equally likely.
Returns
-------
......@@ -1980,7 +1990,7 @@ def microstate_entropy(h):
H = - \sum_{\boldsymbol b}p({\boldsymbol b})\ln p({\boldsymbol b}),
where :math:`p({\boldsymbol b})` is observed frequency of partition
where :math:`p({\boldsymbol b})` is observed frequency of labelled partition
:math:`{\boldsymbol b}`.
References
......@@ -1988,8 +1998,9 @@ def microstate_entropy(h):
.. [mezard-information-2009] Marc Mézard, Andrea Montanari, "Information,
Physics, and Computation", Oxford Univ Press, 2009.
:DOI:`10.1093/acprof:oso/9780198570837.001.0001`
"""
return libinference.partitions_entropy(h)
return libinference.partitions_entropy(h, unlabel)
from . overlap_blockmodel import *
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment