Commit d226ac61 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

inference: Implement beta_dl parameter

This also re-organizes entropy() w.r.t. entropy_args.
parent 1c48aec8
......@@ -99,7 +99,7 @@ for pvals in iter_ranges(pranges):
state.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
state.get_nonempty_B(), file=out)
if overlap:
......@@ -107,7 +107,7 @@ for pvals in iter_ranges(pranges):
state.mcmc_sweep(beta=0, bundled=True,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
state.get_nonempty_B(), file=out)
state = gen_state(directed, deg_corr, layered, overlap, rec_, rec, allow_empty)
......@@ -120,21 +120,21 @@ for pvals in iter_ranges(pranges):
bstate.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
bstate.get_nonempty_B(), file=out)
print("\t\t",
bstate.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
bstate.get_nonempty_B(), file=out)
print("\t\t",
bstate.gibbs_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
bstate.get_nonempty_B(), file=out)
print("\t merge", file=out)
......@@ -149,7 +149,7 @@ for pvals in iter_ranges(pranges):
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
multigraph=False,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
bstate = bstate.copy()
......@@ -158,13 +158,13 @@ for pvals in iter_ranges(pranges):
bstate.mcmc_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
print("\t\t",
bstate.gibbs_sweep(beta=0,
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
else:
print("\t\t",
......@@ -172,7 +172,7 @@ for pvals in iter_ranges(pranges):
entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
multigraph=False,
exact=exact)),
exact=exact, beta_dl=0.95)),
file=out)
print("\t shrink", file=out)
......@@ -181,7 +181,7 @@ for pvals in iter_ranges(pranges):
state = state.shrink(B=5, entropy_args=dict(dl=dl,
degree_dl_kind=degree_dl_kind,
multigraph=False,
exact=exact))
exact=exact, beta_dl=0.95))
print("\t\t", state.B, "\n", file=out)
......@@ -224,7 +224,7 @@ for pvals in iter_ranges(pranges):
else:
state_args = dict(recs=rec_, rec_types=rec)
entropy_args = dict(exact=exact)
entropy_args = dict(exact=exact, beta_dl=0.95)
state = minimize_blockmodel_dl(GraphView(g, directed=directed),
verbose=(1, "\t") if verbose else False,
......
......@@ -152,12 +152,14 @@ void export_blockmodel_state()
.def_readwrite("dense", &entropy_args_t::dense)
.def_readwrite("multigraph", &entropy_args_t::multigraph)
.def_readwrite("adjacency", &entropy_args_t::adjacency)
.def_readwrite("deg_entropy", &entropy_args_t::deg_entropy)
.def_readwrite("recs", &entropy_args_t::recs)
.def_readwrite("partition_dl", &entropy_args_t::partition_dl)
.def_readwrite("degree_dl", &entropy_args_t::degree_dl)
.def_readwrite("degree_dl_kind", &entropy_args_t::degree_dl_kind)
.def_readwrite("edges_dl", &entropy_args_t::edges_dl)
.def_readwrite("recs_dl", &entropy_args_t::recs_dl);
.def_readwrite("recs_dl", &entropy_args_t::recs_dl)
.def_readwrite("beta_dl", &entropy_args_t::beta_dl);
enum_<deg_dl_kind>("deg_dl_kind")
.value("ent", deg_dl_kind::ENT)
......
......@@ -1367,22 +1367,23 @@ public:
}
}
double dS_dl = 0;
if (ea.partition_dl || ea.degree_dl || ea.edges_dl)
{
enable_partition_stats();
auto& ps = get_partition_stats(v);
if (ea.partition_dl)
dS += ps.get_delta_partition_dl(v, r, nr, _vweight);
dS_dl += ps.get_delta_partition_dl(v, r, nr, _vweight);
if (_deg_corr && ea.degree_dl)
dS += ps.get_delta_deg_dl(v, r, nr, _vweight, _eweight,
_degs, _g, ea.degree_dl_kind);
dS_dl += ps.get_delta_deg_dl(v, r, nr, _vweight, _eweight,
_degs, _g, ea.degree_dl_kind);
if (ea.edges_dl)
{
size_t actual_B = 0;
for (auto& ps : _partition_stats)
actual_B += ps.get_actual_B();
dS += ps.get_delta_edges_dl(v, r, nr, _vweight, actual_B,
_g);
dS_dl += ps.get_delta_edges_dl(v, r, nr, _vweight, actual_B,
_g);
}
}
......@@ -1422,8 +1423,8 @@ public:
if (dB_E != 0 && ea.recs_dl && std::isnan(_wparams[i][0])
&& std::isnan(_wparams[i][1]))
{
dS -= -w_log_prior(_B_E);
dS += -w_log_prior(_B_E + dB_E);
dS_dl -= -w_log_prior(_B_E);
dS_dl += -w_log_prior(_B_E + dB_E);
}
};
......@@ -1551,13 +1552,13 @@ public:
{
if (ea.recs_dl && (dB_E != 0 || dBx2 != 0))
{
dS -= -signed_w_log_P(_B_E, _recsum[i],
_recx2[i], wp[0], wp[1],
wp[2], wp[3], _epsilon[i]);
dS += -signed_w_log_P(_B_E + dB_E, _recsum[i],
_recx2[i] + dBx2, wp[0],
wp[1], wp[2], wp[3],
_epsilon[i]);
dS_dl -= -signed_w_log_P(_B_E, _recsum[i],
_recx2[i], wp[0], wp[1],
wp[2], wp[3], _epsilon[i]);
dS_dl += -signed_w_log_P(_B_E + dB_E, _recsum[i],
_recx2[i] + dBx2, wp[0],
wp[1], wp[2], wp[3],
_epsilon[i]);
}
if (dB_E_D != 0 || _dBdx[i] != 0)
......@@ -1583,21 +1584,21 @@ public:
{
size_t N_B_E_D = _B_E_D + dB_E_D;
dS -= -safelog(_B_E_D);
dS += -safelog(N_B_E_D);
dS_dl -= -safelog(_B_E_D);
dS_dl += -safelog(N_B_E_D);
_dBdx[i] = _recdx[i] * dB_E_D + _dBdx[i] * N_B_E_D;
if (_coupled_state == nullptr)
{
size_t L = _Lrecdx[0];
dS -= -positive_w_log_P(L, _Lrecdx[i+1],
wp[2], wp[3],
_epsilon[i]);
dS += -positive_w_log_P(L + dL,
_Lrecdx[i+1] + _dBdx[i],
dS_dl -= -positive_w_log_P(L, _Lrecdx[i+1],
wp[2], wp[3],
_epsilon[i]);
dS_dl += -positive_w_log_P(L + dL,
_Lrecdx[i+1] + _dBdx[i],
wp[2], wp[3],
_epsilon[i]);
}
}
}
......@@ -1645,18 +1646,18 @@ public:
scoped_lock lck(_lock);
if (r_vacate)
{
dS += _coupled_state->virtual_move(r,
_bclabel[r],
null_group,
_coupled_entropy_args);
dS_dl += _coupled_state->virtual_move(r,
_bclabel[r],
null_group,
_coupled_entropy_args);
}
if (nr_occupy)
{
dS += _coupled_state->virtual_move(nr,
null_group,
_bclabel[r],
_coupled_entropy_args);
dS_dl += _coupled_state->virtual_move(nr,
null_group,
_bclabel[r],
_coupled_entropy_args);
}
}
......@@ -1673,11 +1674,11 @@ public:
});
scoped_lock lck(_lock);
dS += _coupled_state->recs_dS(r, nr, recs_entries, _dBdx, dL);
dS_dl += _coupled_state->recs_dS(r, nr, recs_entries, _dBdx, dL);
}
}
return dS;
return dS + ea.beta_dl * dS_dl;
}
double virtual_move(size_t v, size_t r, size_t nr, entropy_args_t ea)
......@@ -2159,20 +2160,47 @@ public:
return S;
}
double entropy(bool dense, bool multigraph, bool deg_entropy, bool exact,
bool recs, bool recs_dl, bool adjacency)
double entropy(entropy_args_t ea)
{
double S = 0;
double S = 0, S_dl = 0;
if (adjacency)
if (ea.adjacency)
{
if (!dense)
S = sparse_entropy(multigraph, deg_entropy, exact);
if (!ea.dense)
S = sparse_entropy(ea.multigraph, ea.deg_entropy, ea.exact);
else
S = dense_entropy(multigraph);
S = dense_entropy(ea.multigraph);
if (!ea.dense && !ea.exact)
{
size_t E = 0;
for (auto e : edges_range(_g))
E += _eweight[e];
if (ea.multigraph)
S -= E;
else
S += E;
}
}
if (recs)
if (ea.partition_dl)
S_dl += get_partition_dl();
if (_deg_corr && ea.degree_dl)
S_dl += get_deg_dl(ea.degree_dl_kind);
if (ea.edges_dl)
{
enable_partition_stats();
size_t actual_B = 0;
for (auto& ps : _partition_stats)
actual_B += ps.get_actual_B();
if (_allow_empty)
actual_B = num_vertices(_bg);
S_dl += get_edges_dl(actual_B, _partition_stats.front().get_E(), _g);
}
if (ea.recs)
{
for (size_t i = 0; i < _rec_types.size(); ++i)
{
......@@ -2189,9 +2217,9 @@ public:
S += -positive_w_log_P(ers, xrs, wp[0], wp[1],
_epsilon[i]);
}
if (recs_dl && std::isnan(wp[0]) && std::isnan(wp[1]))
S += -positive_w_log_P(_B_E, _recsum[i], wp[0], wp[1],
_epsilon[i]);
if (ea.recs_dl && std::isnan(wp[0]) && std::isnan(wp[1]))
S_dl += -positive_w_log_P(_B_E, _recsum[i], wp[0], wp[1],
_epsilon[i]);
break;
case weight_type::DISCRETE_GEOMETRIC:
for (auto me : edges_range(_bg))
......@@ -2200,7 +2228,7 @@ public:
auto xrs = _brec[i][me];
S += -geometric_w_log_P(ers, xrs, wp[0], wp[1]);
}
if (recs_dl && std::isnan(wp[0]) && std::isnan(wp[1]))
if (ea.recs_dl && std::isnan(wp[0]) && std::isnan(wp[1]))
S += -geometric_w_log_P(_B_E, _recsum[i], wp[0], wp[1]);
break;
case weight_type::DISCRETE_POISSON:
......@@ -2212,7 +2240,7 @@ public:
}
for (auto e : edges_range(_g))
S += lgamma(_rec[i][e] + 1);
if (recs_dl && std::isnan(wp[0]) && std::isnan(wp[1]))
if (ea.recs_dl && std::isnan(wp[0]) && std::isnan(wp[1]))
S += -geometric_w_log_P(_B_E, _recsum[i], wp[0], wp[1]);
break;
case weight_type::DISCRETE_BINOMIAL:
......@@ -2224,7 +2252,7 @@ public:
}
for (auto e : edges_range(_g))
S -= lbinom(wp[0], _rec[i][e]);
if (recs_dl && std::isnan(wp[1]) && std::isnan(wp[2]))
if (ea.recs_dl && std::isnan(wp[1]) && std::isnan(wp[2]))
S += -geometric_w_log_P(_B_E, _recsum[i], wp[1], wp[2]);
break;
case weight_type::REAL_NORMAL:
......@@ -2238,10 +2266,10 @@ public:
}
if (std::isnan(wp[0]) && std::isnan(wp[1]))
{
if (recs_dl)
S += -signed_w_log_P(_B_E, _recsum[i], _recx2[i],
wp[0], wp[1], wp[2], wp[3],
_epsilon[i]);
if (ea.recs_dl)
S_dl += -signed_w_log_P(_B_E, _recsum[i], _recx2[i],
wp[0], wp[1], wp[2], wp[3],
_epsilon[i]);
S += -positive_w_log_P(_B_E_D, _recdx[i], wp[2],
wp[3], _epsilon[i]);
}
......@@ -2257,7 +2285,7 @@ public:
}
}
}
return S;
return S + S_dl * ea.beta_dl;
}
double get_partition_dl()
......
......@@ -41,11 +41,13 @@ struct entropy_args_t
bool exact;
bool adjacency;
bool recs;
bool deg_entropy;
bool partition_dl;
bool degree_dl;
deg_dl_kind degree_dl_kind;
bool edges_dl;
bool recs_dl;
double beta_dl;
};
// Sparse entropy terms
......@@ -161,6 +163,14 @@ inline double eterm_dense(size_t r, size_t s, int ers, double wr_r,
return S;
}
// Edges description length
template <class Graph>
double get_edges_dl(size_t B, size_t E, Graph& g)
{
size_t NB = (graph_tool::is_directed(g)) ? B * B : (B * (B + 1)) / 2;
return lbinom(NB + E - 1, E);
}
} // namespace graph_tool
#endif // GRAPH_BLOCKMODEL_ENTROPY_HH
......@@ -574,6 +574,11 @@ public:
return _N;
}
size_t get_E()
{
return _E;
}
size_t get_actual_B()
{
return _actual_B;
......
......@@ -490,7 +490,7 @@ struct Layers
dS -= virtual_move_covariate(v, r, s, *this, m_entries, false);
if (ea.edges_dl)
dS += get_delta_edges_dl(v, r, s);
dS += ea.beta_dl * get_delta_edges_dl(v, r, s);
}
// assert(check_layers());
......@@ -542,13 +542,13 @@ struct Layers
bool nr_occupy = (s != null_group) && (_wr[s] == 0);
int L = _layers.size();
dS -= _actual_B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
dS -= ea.beta_dl * _actual_B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
size_t B = _actual_B;
if (r_vacate)
B--;
if (nr_occupy)
B++;
dS += B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
dS += ea.beta_dl * B * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
}
// assert(check_layers());
......@@ -635,43 +635,82 @@ struct Layers
// assert(check_edge_counts());
}
double entropy(bool dense, bool multigraph, bool deg_entropy,
bool exact, bool recs, bool recs_dl, bool adjacency)
double entropy(entropy_args_t ea)
{
double S = 0;
double S = 0, S_dl = 0;
if (_master)
{
S += BaseState::entropy(dense, multigraph, deg_entropy, exact,
false, false, adjacency);
if (adjacency)
entropy_args_t mea(ea);
mea.edges_dl = false;
mea.recs = false;
mea.recs_dl = false;
S += BaseState::entropy(mea);
if (ea.adjacency)
{
S -= covariate_entropy(_bg, _mrs);
if (multigraph)
if (ea.multigraph)
S -= BaseState::get_parallel_entropy();
for (auto& state : _layers)
{
S += covariate_entropy(state._bg, state._mrs);
if (multigraph)
if (ea.multigraph)
S += state.get_parallel_entropy();
}
}
if (recs)
if (ea.edges_dl)
{
size_t actual_B = _actual_B;
if (BaseState::_allow_empty)
actual_B = num_vertices(BaseState::_bg);
for (auto& state : _layers)
S_dl += get_edges_dl(actual_B, state._E, _g);
}
if (ea.recs)
{
entropy_args_t mea = {false, false, false, false, true,
false, false, false,
ea.degree_dl_kind, false, ea.recs_dl,
ea.beta_dl};
for (auto& state : _layers)
S += state.entropy(false, false, false, false,
true, recs_dl, false);
S += state.entropy(mea);
}
}
else
{
entropy_args_t mea(ea);
mea.partition_dl = false;
mea.edges_dl = false;
for (auto& state : _layers)
S += state.entropy(dense, multigraph, deg_entropy, exact,
recs, recs_dl, adjacency);
S += state.entropy(mea);
if (ea.partition_dl)
S_dl += BaseState::get_partition_dl();
if (ea.edges_dl)
{
for (auto& state : _layers)
{
size_t actual_B = 0;
if (BaseState::_allow_empty)
{
actual_B = num_vertices(_bg);
}
else
{
for (auto r : vertices_range(state._bg))
if (state._wr[r] > 0)
actual_B++;
}
S_dl += get_edges_dl(actual_B, state._E, _g);
}
}
int L = _layers.size();
S += _N * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
S_dl += _N * (L * std::log(2) + std::log1p(-std::pow(2., -L)));
}
return S;
return S + S_dl * ea.beta_dl;
}
double get_delta_edges_dl(size_t v, size_t r, size_t s)
......
......@@ -613,20 +613,21 @@ public:
dS = virtual_move_sparse<false>(v, nr, ea.multigraph, m_entries);
}
double dS_dl = 0;
if (ea.partition_dl || ea.degree_dl || ea.edges_dl)
{
enable_partition_stats();
auto& ps = get_partition_stats(v);
if (ea.partition_dl)
dS += ps.get_delta_partition_dl(v, r, nr, _g);
dS_dl += ps.get_delta_partition_dl(v, r, nr, _g);
if (_deg_corr && ea.degree_dl)
dS += ps.get_delta_deg_dl(v, r, nr, _eweight, _g);
dS_dl += ps.get_delta_deg_dl(v, r, nr, _eweight, _g);
if (ea.edges_dl)
{
size_t actual_B = 0;
for (auto& ps : _partition_stats)
actual_B += ps.get_actual_B();
dS += ps.get_delta_edges_dl(v, r, nr, actual_B, _g);
dS_dl += ps.get_delta_edges_dl(v, r, nr, actual_B, _g);
}
}
......@@ -666,8 +667,8 @@ public:
if (dB_E != 0 && ea.recs_dl && std::isnan(_wparams[i][0])
&& std::isnan(_wparams[i][1]))
{
dS -= -w_log_prior(_B_E);
dS += -w_log_prior(_B_E + dB_E);
dS_dl -= -w_log_prior(_B_E);
dS_dl += -w_log_prior(_B_E + dB_E);
}
};
......@@ -796,13 +797,13 @@ public:
{
if (ea.recs_dl && (dB_E != 0 || dBx2 != 0))
{
dS -= -signed_w_log_P(_B_E, _recsum[i],
_recx2[i], wp[0], wp[1],
wp[2], wp[3], _epsilon[i]);
dS += -signed_w_log_P(_B_E + dB_E, _recsum[i],
_recx2[i] + dBx2, wp[0],
wp[1], wp[2], wp[3],
_epsilon[i]);
dS_dl -= -signed_w_log_P(_B_E, _recsum[i],
_recx2[i], wp[0], wp[1],
wp[2], wp[3], _epsilon[i]);
dS_dl += -signed_w_log_P(_B_E + dB_E, _recsum[i],
_recx2[i] + dBx2, wp[0],
wp[1], wp[2], wp[3],
_epsilon[i]);
}
if (dB_E_D != 0 || _dBdx[i] != 0)
......@@ -828,21 +829,21 @@ public:
{
size_t N_B_E_D = _B_E_D + dB_E_D;