Commit d226ac61 authored by Tiago Peixoto's avatar Tiago Peixoto

inference: Implement beta_dl parameter

This also re-organizes entropy() w.r.t. entropy_args.
parent 1c48aec8
......@@ -99,7 +99,7 @@ for pvals in iter_ranges(pranges):
state.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
state.get_nonempty_B(), file=out)
if overlap:
......@@ -107,7 +107,7 @@ for pvals in iter_ranges(pranges):
state.mcmc_sweep(beta=0, bundled=True,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
state.get_nonempty_B(), file=out)
state = gen_state(directed, deg_corr, layered, overlap, rec_, rec, allow_empty)
......@@ -120,21 +120,21 @@ for pvals in iter_ranges(pranges):
bstate.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
bstate.get_nonempty_B(), file=out)
print("\t\t",
bstate.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
bstate.get_nonempty_B(), file=out)
print("\t\t",
bstate.gibbs_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
bstate.get_nonempty_B(), file=out)
print("\t merge", file=out)
......@@ -149,7 +149,7 @@ for pvals in iter_ranges(pranges):
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
multigraph=False,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
bstate = bstate.copy()
......@@ -158,13 +158,13 @@ for pvals in iter_ranges(pranges):
bstate.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
print("\t\t",
bstate.gibbs_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
else:
print("\t\t",
......@@ -172,7 +172,7 @@ for pvals in iter_ranges(pranges):
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
multigraph=False,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
print("\t shrink", file=out)
......@@ -181,7 +181,7 @@ for pvals in iter_ranges(pranges):
state = state.shrink(B=5, entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
multigraph=False,
exact=exact))
exact=exact, beta_dl=0.95))
print("\t\t", state.B, "\n", file=out)
......@@ -224,7 +224,7 @@ for pvals in iter_ranges(pranges):
else:
state_args = dict(recs=rec_, rec_types=rec)
entropy_args = dict(exact=exact)
entropy_args = dict(exact=exact, beta_dl=0.95)
state = minimize_blockmodel_dl(GraphView(g, directed=directed),
verbose=(1, "\t") if verbose else False,
......
......@@ -152,12 +152,14 @@ void export_blockmodel_state()
.def_readwrite("dense", &entropy_args_t::dense)
.def_readwrite("multigraph", &entropy_args_t::multigraph)
.def_readwrite("adjacency", &entropy_args_t::adjacency)
.def_readwrite("deg_entropy", &entropy_args_t::deg_entropy)
.def_readwrite("recs", &entropy_args_t::recs)
.def_readwrite("partition_dl", &entropy_args_t::partition_dl)
.def_readwrite("degree_dl", &entropy_args_t::degree_dl)
.def_readwrite("degree_dl_kind", &entropy_args_t::degree_dl_kind)
.def_readwrite("edges_dl", &entropy_args_t::edges_dl)
.def_readwrite("recs_dl", &entropy_args_t::recs_dl);
.def_readwrite("recs_dl", &entropy_args_t::recs_dl)
.def_readwrite("beta_dl", &entropy_args_t::beta_dl);
enum_<deg_dl_kind>("deg_dl_kind")
.value("ent", deg_dl_kind::ENT)
......
......@@ -41,11 +41,13 @@ struct entropy_args_t
bool exact;
bool adjacency;
bool recs;
bool deg_entropy;
bool partition_dl;
bool degree_dl;
deg_dl_kind degree_dl_kind;
bool edges_dl;
bool recs_dl;
double beta_dl;
};
// Sparse entropy terms
......@@ -161,6 +163,14 @@ inline double eterm_dense(size_t r, size_t s, int ers, double wr_r,
return S;
}
// Edges description length
template <class Graph>
double get_edges_dl(size_t B, size_t E, Graph& g)
{
size_t NB = (graph_tool::is_directed(g)) ? B * B : (B * (B + 1)) / 2;
return lbinom(NB + E - 1, E);
}
} // namespace graph_tool
#endif // GRAPH_BLOCKMODEL_ENTROPY_HH
......@@ -574,6 +574,11 @@ public:
return _N;
}
size_t get_E()
{
return _E;
}
size_t get_actual_B()
{
return _actual_B;
......
......@@ -490,7 +490,7 @@ struct Layers
dS -= virtual_move_covariate(v, r, s, *this, m_entries, false);
if (ea.edges_dl)
dS += get_delta_edges_dl(v, r, s);
dS += ea.beta_dl * get_delta_edges_dl(v, r, s);
}
// assert(check_layers());
......@@ -542,13 +542,13 @@ struct Layers
bool nr_occupy = (s != null_group) && (_wr[s] == 0);
int L = _layers.size();
dS -= _actual_B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
dS -= ea.beta_dl * _actual_B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
size_t B = _actual_B;
if (r_vacate)
B--;
if (nr_occupy)
B++;
dS += B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
dS += ea.beta_dl * B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
}
// assert(check_layers());
......@@ -635,43 +635,82 @@ struct Layers
// assert(check_edge_counts());
}
double entropy(bool dense, bool multigraph, bool deg_entropy,
bool exact, bool recs, bool recs_dl, bool adjacency)
double entropy(entropy_args_t ea)
{
double S = 0;
double S = 0, S_dl = 0;
if (_master)
{
S += BaseState::entropy(dense, multigraph, deg_entropy, exact,
false, false, adjacency);
if (adjacency)
entropy_args_t mea(ea);
mea.edges_dl = false;
mea.recs = false;
mea.recs_dl = false;
S += BaseState::entropy(mea);
if (ea.adjacency)
{
S -= covariate_entropy(_bg, _mrs);
if (multigraph)
if (ea.multigraph)
S -= BaseState::get_parallel_entropy();
for (auto& state : _layers)
{
S += covariate_entropy(state._bg, state._mrs);
if (multigraph)
if (ea.multigraph)
S += state.get_parallel_entropy();
}
}
if (recs)
if (ea.edges_dl)
{
size_t actual_B = _actual_B;
if (BaseState::_allow_empty)
actual_B = num_vertices(BaseState::_bg);
for (auto& state : _layers)
S_dl += get_edges_dl(actual_B, state._E, _g);
}
if (ea.recs)
{
entropy_args_t mea = {false, false, false, false, true,
false, false, false,
ea.degree_dl_kind, false, ea.recs_dl,
ea.beta_dl};
for (auto& state : _layers)
S += state.entropy(false, false, false, false,
true, recs_dl, false);
S += state.entropy(mea);
}
}
else
{
entropy_args_t mea(ea);
mea.partition_dl = false;
mea.edges_dl = false;
for (auto& state : _layers)
S += state.entropy(dense, multigraph, deg_entropy, exact,
recs, recs_dl, adjacency);
S += state.entropy(mea);
if (ea.partition_dl)
S_dl += BaseState::get_partition_dl();
if (ea.edges_dl)
{
for (auto& state : _layers)
{
size_t actual_B = 0;
if (BaseState::_allow_empty)
{
actual_B = num_vertices(_bg);
}
else
{
for (auto r : vertices_range(state._bg))
if (state._wr[r] > 0)
actual_B++;
}
S_dl += get_edges_dl(actual_B, state._E, _g);
}
}
int L = _layers.size();
S += _N * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
S_dl += _N * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
}
return S;
return S + S_dl * ea.beta_dl;
}
double get_delta_edges_dl(size_t v, size_t r, size_t s)
......
......@@ -991,6 +991,11 @@ struct overlap_partition_stats_t
return _actual_B;
}
size_t get_E()
{
return _E;
}
void add_block()
{
_total_B++;
......
......@@ -118,7 +118,7 @@ def get_block_graph(g, B, b, vcount=None, ecount=None, rec=None, drec=None):
return cg
def get_entropy_args(kargs):
def get_entropy_args(kargs, ignore=None):
kargs = kargs.copy()
args = DictState(kargs)
deg_dl_kind = args.degree_dl_kind
......@@ -134,11 +134,13 @@ def get_entropy_args(kargs):
ea.dense = args.dense
ea.multigraph = args.multigraph
ea.adjacency = args.adjacency
ea.deg_entropy = args.deg_entropy
ea.recs = args.recs
del kargs["exact"]
del kargs["dense"]
del kargs["multigraph"]
del kargs["adjacency"]
del kargs["deg_entropy"]
del kargs["recs"]
if args.dl:
ea.partition_dl = args.partition_dl
......@@ -151,12 +153,17 @@ def get_entropy_args(kargs):
ea.edges_dl = False
ea.recs_dl = False
ea.degree_dl_kind = kind
ea.beta_dl = args.beta_dl
del kargs["dl"]
del kargs["partition_dl"]
del kargs["degree_dl"]
del kargs["edges_dl"]
del kargs["recs_dl"]
del kargs["beta_dl"]
kargs.pop("callback", None)
if ignore is not None:
for a in ignore:
del kargs[a]
if len(kargs) > 0:
raise ValueError("unrecognized entropy arguments: " +
str(list(kargs.keys())))
......@@ -459,10 +466,11 @@ class BlockState(object):
if deg_corr:
init_q_cache(max(2 * max(self.get_E(), self.get_N()), 100))
self._entropy_args = dict(adjacency=True, dl=True, partition_dl=True,
degree_dl=True, degree_dl_kind="distributed",
edges_dl=True, dense=False, multigraph=True,
exact=True, recs=True, recs_dl=True)
self._entropy_args = dict(adjacency=True, deg_entropy=True, dl=True,
partition_dl=True, degree_dl=True,
degree_dl_kind="distributed", edges_dl=True,
dense=False, multigraph=True, exact=True,
recs=True, recs_dl=True, beta_dl=1.)
if len(kwargs) > 0:
warnings.warn("unrecognized keyword arguments: " +
......@@ -911,7 +919,7 @@ class BlockState(object):
def entropy(self, adjacency=True, dl=True, partition_dl=True,
degree_dl=True, degree_dl_kind="distributed", edges_dl=True,
dense=False, multigraph=True, deg_entropy=True,
recs=True, recs_dl=True, exact=True, **kwargs):
recs=True, recs_dl=True, beta_dl=1., exact=True, **kwargs):
r"""Calculate the entropy (a.k.a. negative log-likelihood) associated
with the current block partition.
......@@ -948,6 +956,8 @@ class BlockState(object):
recs_dl : ``bool`` (optional, default: ``True``)
If ``True``, and ``dl == True`` the edge covariate description
length will be included.
beta_dl : ``double`` (optional, default: ``1.``)
Prior inverse temperature.
exact : ``bool`` (optional, default: ``True``)
If ``True``, the exact expressions will be used. Otherwise,
Stirling's factorial approximation will be used for some terms.
......@@ -1086,56 +1096,25 @@ class BlockState(object):
"""
eargs = get_entropy_args(locals(), ignore=["self", "kwargs"])
if _bm_test() and kwargs.get("test", True):
args = dict(**locals())
args.update(**kwargs)
del args["eargs"]
del args["self"]
del args["kwargs"]
kwargs = kwargs.copy()
E = self.get_E()
N = self.get_N()
if not dl:
edges_dl = partition_dl = degree_dl = recs_dl = dl
S = self._state.entropy(dense, multigraph, deg_entropy, exact,
recs, recs_dl, adjacency)
if adjacency and not dense and not exact:
if multigraph:
S -= E
else:
S += E
S = self._state.entropy(eargs)
dl_S = 0
if partition_dl:
dl_S += self._state.get_partition_dl()
if edges_dl:
if self.allow_empty:
actual_B = self.B
else:
actual_B = self.get_nonempty_B()
dl_S += model_entropy(actual_B, N, E,
directed=self.g.is_directed(), nr=False)
if self.deg_corr and degree_dl:
if degree_dl_kind == "entropy":
kind = libinference.deg_dl_kind.ent
elif degree_dl_kind == "uniform":
kind = libinference.deg_dl_kind.uniform
elif degree_dl_kind == "distributed":
kind = libinference.deg_dl_kind.dist
dl_S += self._state.get_deg_dl(kind)
callback = kwargs.pop("callback", None)
if callback is not None:
dl_S += callback(self)
S += dl_S
S += beta_dl * dl_S
if kwargs.pop("test", True) and _bm_test():
assert not isnan(S) and not isinf(S), \
......
......@@ -802,31 +802,11 @@ class LayeredBlockState(OverlapBlockState, BlockState):
S = BlockState.entropy(self, adjacency=adjacency, dl=dl,
partition_dl=partition_dl, degree_dl=degree_dl,
degree_dl_kind=degree_dl_kind, edges_dl=False,
degree_dl_kind=degree_dl_kind, edges_dl=edges_dl,
dense=dense, multigraph=multigraph,
deg_entropy=deg_entropy, exact=exact,
**dict(kwargs, test=False))
if dl and edges_dl:
if self.layers:
for state in self.layer_states:
if not self.allow_empty:
actual_B = (state.wr.a > 0).sum()
else:
actual_B = self.B
S += model_entropy(actual_B, 0, state.get_E(),
directed=self.g.is_directed(),
nr=False)
else:
if not self.allow_empty:
actual_B = (self.wr.a > 0).sum()
else:
actual_B = self.B
for state in self.layer_states:
S += model_entropy(actual_B, 0, state.get_E(),
directed=self.g.is_directed(),
nr=False)
if _bm_test() and kwargs.get("test", True):
assert not isnan(S) and not isinf(S), \
"invalid entropy %g (%s) " % (S, str(args))
......
......@@ -95,7 +95,8 @@ class NestedBlockState(object):
edges_dl=True,
exact=True,
recs=True,
recs_dl=True)
recs_dl=True,
beta_dl=1.)
self.levels = [base_type(g, b=bs[0], **self.state_args)]
for i, b in enumerate(bs[1:]):
state = self.levels[-1]
......@@ -316,9 +317,13 @@ class NestedBlockState(object):
else:
eargs = kwargs
S = bstate.entropy(**dict(eargs, dl=True,
edges_dl=(l == (len(self.levels) - 1)),
recs_dl=(l == (len(self.levels) - 1))))
top = (l == (len(self.levels) - 1))
S = bstate.entropy(**dict(eargs, dl=True, edges_dl=top, recs_dl=top))
if l > 0:
S *= kwargs.get("beta_dl", 1.)
return S
def _Lrecdx_entropy(self, Lrecdx=None):
......@@ -378,7 +383,7 @@ class NestedBlockState(object):
for l in range(len(self.levels)):
S += self.level_entropy(l, **dict(kwargs, test=False))
S += self._Lrecdx_entropy()
S += kwargs.get("entropy_args", {}).get("beta_dl", 1.) * self._Lrecdx_entropy()
if _bm_test() and kwargs.pop("test", True):
state = self.copy()
......@@ -698,16 +703,15 @@ class NestedBlockState(object):
else:
eargs = entropy_args
eargs = dict(eargs, dl=True,
edges_dl=(l == len(self.levels) - 1),
recs_dl=(l == len(self.levels) - 1))
top = (l == len(self.levels) - 1)
eargs = dict(eargs, dl=True, edges_dl=top, recs_dl=top)
if l < len(self.levels) - 1:
def callback(s):
s = self.levels[l + 1]
S = s.entropy(**dict(self.hentropy_args,
edges_dl=(l + 1 == len(self.levels) - 1),
recs_dl=(l + 1 == len(self.levels) - 1)))
s_top = (l + 1 == len(self.levels) - 1)
S = s.entropy(**dict(self.hentropy_args, edges_dl=s_top,
recs_dl=s_top))
return S
eargs = dict(eargs, callback=callback)
......@@ -729,6 +733,8 @@ class NestedBlockState(object):
args = dict(kwargs, entropy_args=eargs, c=c[l])
if l > 0:
if "beta_dl" in entropy_args:
args = dict(args, beta=args.get("beta", 1.) * entropy_args["beta_dl"])
N_ = self.levels[l].get_N()
idx_ = self.levels[l].wr.a == 0
rs = arange(len(idx_), dtype="int")
......@@ -741,7 +747,10 @@ class NestedBlockState(object):
ret = algo(self.levels[l], **args)
dS += ret[0]
if l > 0 and "beta_dl" in entropy_args:
dS += ret[0] * entropy_args["beta_dl"]
else:
dS += ret[0]
nattempts += ret[1]
nmoves += ret[2]
......@@ -768,14 +777,12 @@ class NestedBlockState(object):
kwargs = dict(kwargs, test=False)
entropy_args = kwargs.get("entropy_args", {})
Si = self.entropy(**entropy_args)
ddS = [-self.level_entropy(l, **dict(entropy_args, test=False)) for l in range(len(self.levels))]
dS, nattempts, nmoves = self._h_sweep(lambda s, **a: s.mcmc_sweep(**a), c=c,
**kwargs)
dS, nattempts, nmoves = self._h_sweep(lambda s, **a: s.mcmc_sweep(**a),
c=c, **kwargs)
if _bm_test():
Sf = self.entropy(**entropy_args)
ddS = [ddS[l] + self.level_entropy(l, **dict(entropy_args, test=False)) for l in range(len(self.levels))]
assert math.isclose(dS, (Sf - Si), abs_tol=1e-8), \
"inconsistent entropy delta %g (%g): %s" % (dS, Sf - Si,
str(entropy_args))
......
......@@ -263,10 +263,11 @@ class OverlapBlockState(BlockState):
if deg_corr:
init_q_cache(max(self.get_E(), self.get_N()) + 1)
self._entropy_args = dict(adjacency=True, dl=True, partition_dl=True,
degree_dl=True, degree_dl_kind="distributed",
edges_dl=True, dense=False, multigraph=True,
exact=True, recs=True, recs_dl=True)
self._entropy_args = dict(adjacency=True, deg_entropy=True, dl=True,
partition_dl=True, degree_dl=True,
degree_dl_kind="distributed", edges_dl=True,
dense=False, multigraph=True, exact=True,
recs=True, recs_dl=True, beta_dl=1.)
self._coupled_state = None
......@@ -445,7 +446,7 @@ class OverlapBlockState(BlockState):
def entropy(self, adjacency=True, dl=True, partition_dl=True,
degree_dl=True, degree_dl_kind="distributed", edges_dl=True,
dense=False, multigraph=True, deg_entropy=True, recs=True,
recs_dl=True, exact=True, **kwargs):
recs_dl=True, beta_dl=1., exact=True, **kwargs):
r"""Calculate the entropy associated with the current block partition.
Parameters
......@@ -481,6 +482,8 @@ class OverlapBlockState(BlockState):
recs_dl : ``bool`` (optional, default: ``True``)
If ``True``, and ``dl == True`` the edge covariate description
length will be included.
beta_dl : ``double`` (optional, default: ``1.``)
Prior inverse temperature.
exact : ``bool`` (optional, default: ``True``)
If ``True``, the exact expressions will be used. Otherwise,
Stirling's factorial approximation will be used for some terms.
......@@ -588,7 +591,8 @@ class OverlapBlockState(BlockState):
edges_dl=edges_dl, dense=dense,
multigraph=multigraph,
deg_entropy=deg_entropy, recs=recs,
recs_dl=recs_dl, exact=exact, **kwargs)
recs_dl=recs_dl, beta_dl=beta_dl, exact=exact,
**kwargs)
def _mcmc_sweep_dispatch(self, mcmc_state):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment