Commit 3d9964ab authored by Tiago Peixoto's avatar Tiago Peixoto

Implement posterior sampling for layered networks

This includes support for sampling from posteriors of layered models
that are also overlapping and/or with edge covariates.

This fixes issue #325
parent ce270193
Pipeline #355 failed with stage
in 334 minutes and 27 seconds
This diff is collapsed.
......@@ -155,7 +155,8 @@ void export_blockmodel_state()
.def_readwrite("partition_dl", &entropy_args_t::partition_dl)
.def_readwrite("degree_dl", &entropy_args_t::degree_dl)
.def_readwrite("degree_dl_kind", &entropy_args_t::degree_dl_kind)
.def_readwrite("edges_dl", &entropy_args_t::edges_dl);
.def_readwrite("edges_dl", &entropy_args_t::edges_dl)
.def_readwrite("recs_dl", &entropy_args_t::recs_dl);
enum_<deg_dl_kind>("deg_dl_kind")
.value("ent", deg_dl_kind::ENT)
......
This diff is collapsed.
......@@ -86,11 +86,11 @@ vector<std::reference_wrapper<Type>> from_any_list(boost::python::object list)
};
void split_layers(GraphInterface& gi, boost::any& aec, boost::any& ab,
boost::any& arec, boost::any& adrec, boost::any& aeweight,
boost::any& avweight, boost::any& avc, boost::any& avmap,
boost::any& alweight, boost::python::object& ous,
boost::python::object& oub, boost::python::object& ourec,
boost::python::object& oudrec,
boost::python::object& arec, boost::python::object& adrec,
boost::any& aeweight, boost::any& avweight, boost::any& avc,
boost::any& avmap, boost::any& alweight,
boost::python::object& ous, boost::python::object& oub,
boost::python::object& ourec, boost::python::object& oudrec,
boost::python::object& oueweight,
boost::python::object& ouvweight, vbmap_t& block_map,
boost::python::object& obrmap, boost::python::object& ouvmap)
......@@ -98,12 +98,12 @@ void split_layers(GraphInterface& gi, boost::any& aec, boost::any& ab,
typedef vprop_map_t<int32_t>::type vmap_t;
typedef vprop_map_t<vector<int32_t>>::type vvmap_t;
typedef eprop_map_t<int32_t>::type emap_t;
typedef eprop_map_t<std::vector<double>>::type remap_t;
typedef eprop_map_t<double>::type remap_t;
emap_t& ec = any_cast<emap_t&>(aec);
vmap_t& b = any_cast<vmap_t&>(ab);
remap_t& rec = any_cast<remap_t&>(arec);
remap_t& drec = any_cast<remap_t&>(adrec);
auto rec = from_any_list<remap_t>(arec);
auto drec = from_any_list<remap_t>(adrec);
vmap_t& vweight = any_cast<vmap_t&>(avweight);
emap_t& eweight = any_cast<emap_t&>(aeweight);
vvmap_t& vc = any_cast<vvmap_t&>(avc);
......@@ -112,8 +112,11 @@ void split_layers(GraphInterface& gi, boost::any& aec, boost::any& ab,
auto us = from_rlist<GraphInterface>(ous);
auto ub = from_any_list<vmap_t>(oub);
auto urec = from_any_list<remap_t>(ourec);
auto udrec = from_any_list<remap_t>(oudrec);
vector<vector<std::reference_wrapper<remap_t>>> urec, udrec;
for (int i = 0; i < boost::python::len(ourec); ++i)
urec.push_back(from_any_list<remap_t>(ourec[i]));
for (int i = 0; i < boost::python::len(oudrec); ++i)
udrec.push_back(from_any_list<remap_t>(oudrec[i]));
auto uvweight = from_any_list<vmap_t>(ouvweight);
auto ueweight = from_any_list<emap_t>(oueweight);
......@@ -150,9 +153,14 @@ void split_layers(GraphInterface& gi, boost::any& aec, boost::any& ab,
vmap[v].insert(vmap[v].begin() + pos, u);
uvmap[l].get()[u] = v;
if (lw[v].empty())
{
uvweight[l].get()[u] = vweight[v];
}
else
{
assert(lw[v].find(l) != lw[v].end());
uvweight[l].get()[u] = lw[v][l];
}
size_t r = b[v];
size_t u_r;
......@@ -190,8 +198,104 @@ void split_layers(GraphInterface& gi, boost::any& aec, boost::any& ab,
auto u_t = get_v(t, l);
auto ne = add_edge(u_s, u_t, us[l].get().get_graph()).first;
ueweight[l].get()[ne] = eweight[e];
urec[l].get()[ne] = rec[e];
udrec[l].get()[ne] = drec[e];
for (size_t i = 0; i < rec.size(); ++i)
{
urec[l][i].get()[ne] = rec[i].get()[e];
udrec[l][i].get()[ne] = drec[i].get()[e];
}
assert(uvweight[l].get()[u_s] > 0 || eweight[e] == 0);
assert(uvweight[l].get()[u_t] > 0 || eweight[e] == 0);
}
})();
}
void split_groups(boost::any& ab, boost::any& avc, boost::any& avmap,
boost::python::object& ous, boost::python::object& oub,
boost::python::object& ouvweight,
vbmap_t& block_map, boost::python::object& obrmap,
boost::python::object& ouvmap)
{
typedef vprop_map_t<int32_t>::type vmap_t;
typedef vprop_map_t<vector<int32_t>>::type vvmap_t;
vmap_t& b = any_cast<vmap_t&>(ab);
vvmap_t& vc = any_cast<vvmap_t&>(avc);
vvmap_t& vmap = any_cast<vvmap_t&>(avmap);
auto us = from_rlist<GraphInterface>(ous);
auto ub = from_any_list<vmap_t>(oub);
auto block_rmap = from_any_list<vmap_t>(obrmap);
auto uvmap = from_any_list<vmap_t>(ouvmap);
auto uvweight = from_any_list<vmap_t>(ouvweight);
block_map.resize(us.size());
for (size_t l = 0; l < us.size(); ++l)
{
auto& u = us[l].get().get_graph();
auto& uvmap_l = uvmap[l].get();
auto& ub_l = ub[l].get();
auto& bmap = block_map[l];
auto& brmap = block_rmap[l].get();
auto& vweight_l = uvweight[l].get();
for (auto w : vertices_range(u))
{
if (vweight_l[w] == 0)
continue;
auto v = uvmap_l[w];
vc[v].push_back(l);
vmap[v].push_back(w);
size_t r = b[v];
size_t u_r;
auto riter = bmap.find(r);
if (riter == bmap.end())
{
u_r = bmap.size();
bmap[r] = u_r;
brmap[u_r] = r;
}
else
{
u_r = riter->second;
}
ub_l[w] = u_r;
}
}
}
void get_rvmap(GraphInterface& gi, boost::any& avc, boost::any& avmap,
boost::python::object& ouvmap)
{
typedef vprop_map_t<int32_t>::type vmap_t;
typedef vprop_map_t<vector<int32_t>>::type vvmap_t;
vvmap_t& vc = any_cast<vvmap_t&>(avc);
vvmap_t& vmap = any_cast<vvmap_t&>(avmap);
auto uvmap = from_any_list<vmap_t>(ouvmap);
run_action<>()(gi,
[&](auto& g)
{
for (auto v : vertices_range(g))
{
auto& ls = vc[v];
auto& vs = vmap[v];
for (size_t i = 0; i < ls.size(); ++i)
{
auto l = ls[i];
auto w = vs[i];
uvmap[l].get()[w] = v;
}
}
})();
}
......@@ -445,6 +549,8 @@ void export_layered_blockmodel_state()
def("make_layered_block_state", &make_layered_block_state);
def("split_layers", &split_layers);
def("split_groups", &split_groups);
def("get_rvmap", &get_rvmap);
def("get_layered_block_degs", &get_layered_block_degs);
def("get_mapped_block_degs", &get_mapped_block_degs);
def("get_ldegs", &get_ldegs);
......
......@@ -41,6 +41,9 @@ void export_lsbm()
{
using namespace boost::python;
class_<LayeredBlockStateVirtualBase, boost::noncopyable>
("LayeredBlockStateVirtualBase", no_init);
block_state::dispatch
([&](auto* bs)
{
......@@ -68,9 +71,13 @@ void export_lsbm()
&state_t::remove_vertices;
void (state_t::*add_vertices)(python::object, python::object) =
&state_t::add_vertices;
void (state_t::*couple_state)(LayeredBlockStateVirtualBase&,
entropy_args_t) =
&state_t::couple_state;
class_<state_t> c(name_demangle(typeid(state_t).name()).c_str(),
no_init);
class_<state_t, bases<LayeredBlockStateVirtualBase>>
c(name_demangle(typeid(state_t).name()).c_str(),
no_init);
c.def("remove_vertex", &state_t::remove_vertex)
.def("add_vertex", &state_t::add_vertex)
.def("move_vertex", &state_t::move_vertex)
......@@ -85,16 +92,32 @@ void export_lsbm()
.def("get_partition_dl", &state_t::get_partition_dl)
.def("get_deg_dl", &state_t::get_deg_dl)
.def("get_move_prob", get_move_prob)
.def("couple_state", couple_state)
.def("decouple_state",
&state_t::decouple_state)
.def("get_B_E",
&state_t::get_B_E)
.def("get_B_E_D",
&state_t::get_B_E_D)
.def("get_layer",
+[](state_t& state, size_t l) -> python::object
{
return python::object(block_state_t(state.get_layer(l)));
})
.def("enable_partition_stats",
&state_t::enable_partition_stats)
.def("disable_partition_stats",
&state_t::disable_partition_stats)
.def("is_partition_stats_enabled",
&state_t::is_partition_stats_enabled);
&state_t::is_partition_stats_enabled)
.def("clear_egroups",
&state_t::clear_egroups)
.def("rebuild_neighbour_sampler",
&state_t::rebuild_neighbour_sampler)
.def("sync_emat",
&state_t::sync_emat)
.def("sync_bclabel",
&state_t::sync_bclabel);
});
});
}
......@@ -85,6 +85,9 @@ void export_layered_overlap_blockmodel_state()
void (state_t::*move_vertices)(python::object,
python::object) =
&state_t::move_vertices;
void (state_t::*couple_state)(LayeredBlockStateVirtualBase&,
entropy_args_t) =
&state_t::couple_state;
class_<state_t> c(name_demangle(typeid(state_t).name()).c_str(),
no_init);
......@@ -98,16 +101,32 @@ void export_layered_overlap_blockmodel_state()
.def("get_partition_dl", &state_t::get_partition_dl)
.def("get_deg_dl", &state_t::get_deg_dl)
.def("get_move_prob", get_move_prob)
.def("couple_state", couple_state)
.def("decouple_state",
&state_t::decouple_state)
.def("get_B_E",
&state_t::get_B_E)
.def("get_B_E_D",
&state_t::get_B_E_D)
.def("get_layer",
+[](state_t& state, size_t l) -> python::object
{
return python::object(block_state_t(state.get_layer(l)));
})
.def("enable_partition_stats",
&state_t::enable_partition_stats)
.def("disable_partition_stats",
&state_t::disable_partition_stats)
.def("is_partition_stats_enabled",
&state_t::is_partition_stats_enabled);
&state_t::is_partition_stats_enabled)
.def("clear_egroups",
&state_t::clear_egroups)
.def("rebuild_neighbour_sampler",
&state_t::rebuild_neighbour_sampler)
.def("sync_emat",
&state_t::sync_emat)
.def("sync_bclabel",
&state_t::sync_bclabel);
});
});
......
......@@ -27,16 +27,12 @@ namespace graph_tool
using namespace boost;
using namespace std;
template <class State, class MEntries>
double virtual_move_covariate(size_t v, size_t r, size_t s, State& state,
MEntries& m_entries, bool reset)
{
if (reset)
{
m_entries.clear();
state.get_move_entries(v, r, s, m_entries);
}
auto& entries = m_entries.get_entries();
auto& delta = m_entries.get_delta();
......
......@@ -335,6 +335,11 @@ public:
size_t get_N() const { return _N; }
void add_block()
{
_block_nodes.emplace_back();
}
static constexpr size_t _null = numeric_limits<size_t>::max();
private:
......@@ -1248,6 +1253,11 @@ struct overlap_partition_stats_t
return _actual_B;
}
void add_block()
{
_total_B++;
}
private:
overlap_stats_t& _overlap_stats;
vector<size_t>& _bmap;
......
......@@ -35,7 +35,7 @@
#include <boost/multi_array.hpp>
#include <boost/math/special_functions/gamma.hpp>
#ifdef USING_OPENMP
#ifdef _OPENMP
#include <omp.h>
#endif
......@@ -159,6 +159,7 @@ struct entropy_args_t
bool degree_dl;
deg_dl_kind degree_dl_kind;
bool edges_dl;
bool recs_dl;
};
// Sparse entropy terms
......@@ -973,19 +974,26 @@ class perfect_hash_t
{
public:
template <class RNG>
perfect_hash_t(size_t N, RNG& rng)
: _index(std::make_shared<std::vector<size_t>>())
perfect_hash_t(size_t N, std::vector<size_t>& index, RNG& rng)
: _index(&index)
{
auto& index = *_index;
index.reserve(N);
for (size_t i = 0; i < N; ++i)
index.push_back(i);
std::shuffle(index.begin(), index.end(), rng);
}
perfect_hash_t(std::vector<size_t>& index)
: _index(&index) {}
perfect_hash_t() {}
size_t operator()(const Key& k) const {return (*_index)[k];}
size_t operator()(const Key& k) const { return (*_index)[k]; }
void resize(size_t N)
{
auto& index = *_index;
for (size_t i = index.size(); i < N; ++i)
index.push_back(i);
}
private:
std::shared_ptr<std::vector<size_t>> _index;
std::vector<size_t>* _index;
};
// this structure speeds up the access to the edges between given blocks, since
......@@ -999,14 +1007,28 @@ public:
template <class RNG>
EHash(BGraph& bg, RNG& rng)
: _hash_function(num_vertices(bg), rng),
: _hash_function(num_vertices(bg), _index, rng),
_hash(num_vertices(bg), ehash_t(0, _hash_function))
{
sync(bg);
}
EHash(const EHash& other)
: _index(other._index),
_hash_function(_index),
_hash(other._hash.size(), ehash_t(0, _hash_function))
{
for (size_t r = 0; r < other._hash.size(); ++r)
{
auto& h = _hash[r];
for (const auto& x : other._hash[r])
h[x.first] = x.second;
}
}
void sync(BGraph& bg)
{
_hash_function.resize(num_vertices(bg));
_hash.clear();
_hash.resize(num_vertices(bg), ehash_t(0, _hash_function));
for (auto& h : _hash)
......@@ -1022,7 +1044,7 @@ public:
typedef typename graph_traits<BGraph>::vertex_descriptor vertex_t;
typedef typename graph_traits<BGraph>::edge_descriptor edge_t;
__attribute__((flatten))
__attribute__((flatten)) __attribute__ ((hot))
const auto& get_me(vertex_t r, vertex_t s) const
{
if (!is_directed::apply<BGraph>::type::value && r > s)
......@@ -1056,6 +1078,7 @@ public:
const auto& get_null_edge() const { return _null_edge; }
private:
std::vector<size_t> _index;
perfect_hash_t<vertex_t> _hash_function;
typedef gt_hash_map<vertex_t, edge_t, perfect_hash_t<vertex_t>> ehash_t;
std::vector<ehash_t> _hash;
......@@ -1616,6 +1639,7 @@ public:
virtual void coupled_resize_vertex(size_t v) = 0;
virtual double virtual_move(size_t v, size_t r, size_t nr,
entropy_args_t eargs) = 0;
virtual size_t add_block() = 0;
virtual void add_edge(const GraphInterface::edge_t& e) = 0;
virtual void remove_edge(const GraphInterface::edge_t& e) = 0;
virtual void update_edge(const GraphInterface::edge_t& e,
......@@ -1625,6 +1649,7 @@ public:
GraphInterface::edge_t, int,
std::vector<double>>>&,
std::vector<double>&, int) = 0;
virtual bool check_edge_counts(bool emat=true) = 0;
};
......
......@@ -63,15 +63,14 @@ def cleanup_cache(b_cache, B_min, B_max):
for Bi in del_Bs:
del b_cache[Bi]
def get_ent(state, mcmc_multilevel_args, extra_entropy_args):
def get_ent(state, mcmc_multilevel_args):
mcmc_equilibrate_args = mcmc_multilevel_args.get("mcmc_equilibrate_args", {})
mcmc_args = mcmc_equilibrate_args.get("mcmc_args", {})
entropy_args = mcmc_args.get("entropy_args", {})
S = state.entropy(**dict(entropy_args, **extra_entropy_args))
S = state.entropy(**dict(entropy_args))
return S
def get_state_dl(B, b_cache, mcmc_multilevel_args={}, extra_entropy_args={},
verbose=False):
def get_state_dl(B, b_cache, mcmc_multilevel_args={}, verbose=False):
if B in b_cache:
return b_cache[B][0]
Bs = sorted(b_cache.keys())
......@@ -83,13 +82,12 @@ def get_state_dl(B, b_cache, mcmc_multilevel_args={}, extra_entropy_args={},
verbose=verbose_push(verbose,
("B: %d <- %d " %
(B, B_prev)))))
dl = get_ent(state, mcmc_multilevel_args, extra_entropy_args)
dl = get_ent(state, mcmc_multilevel_args)
b_cache[B] = (dl, state)
return dl
def bisection_minimize(init_states, random_bisection=False,
mcmc_multilevel_args={}, extra_entropy_args={},
verbose=False):
mcmc_multilevel_args={}, verbose=False):
r"""Find the best order (number of groups) given an initial set of states by
performing a one-dimension minimization, using a Fibonacci (or golden
section) search.
......@@ -103,8 +101,6 @@ def bisection_minimize(init_states, random_bisection=False,
instead of using the golden rule.
mcmc_multilevel_args : ``dict`` (optional, default: ``{}``)
Arguments to be passed to :func:`~graph_tool.inference.mcmc_multilevel`.
extra_entropy_args : ``dict`` (optional, default: ``{}``)
Extra arguments to be passed to ``state.entropy()``.
verbose : ``bool`` or ``tuple`` (optional, default: ``False``)
If ``True``, progress information will be shown. Optionally, this
accepts arguments of the type ``tuple`` of the form ``(level, prefix)``
......@@ -137,8 +133,7 @@ def bisection_minimize(init_states, random_bisection=False,
b_cache = {}
for state in init_states:
b_cache[state.get_nonempty_B()] = (get_ent(state, mcmc_multilevel_args,
extra_entropy_args),
b_cache[state.get_nonempty_B()] = (get_ent(state, mcmc_multilevel_args),
state.copy())
max_B = max(b_cache.keys())
......@@ -148,7 +143,6 @@ def bisection_minimize(init_states, random_bisection=False,
kwargs = dict(b_cache=b_cache,
mcmc_multilevel_args=dict(mcmc_multilevel_args,
b_cache=b_cache),
extra_entropy_args=extra_entropy_args,
verbose=verbose_push(verbose, (" " * 4)))
# Initial bracketing
......
This diff is collapsed.
......@@ -185,9 +185,9 @@ class OverlapBlockState(BlockState):
rec_types = list(rec_types)
rec_params = list(rec_params)
if len(rec_params) < len(rec_types):
rec_params += [{} for i in range((len(rec_types) -
len(rec_params)))]
# if len(rec_params) < len(rec_types):
# rec_params += [{} for i in range((len(rec_types) -
# len(rec_params)))]
if len(self.rec) > 0 and rec_types[0] != libinference.rec_type.count:
rec_types.insert(0, libinference.rec_type.count)
......@@ -243,17 +243,19 @@ class OverlapBlockState(BlockState):
self.Lrecdx = libcore.Vector_double(len(self.rec)+1)
self.Lrecdx[0] = -1
self.Lrecdx.resize(len(self.rec)+1)
self.epsilon = libcore.Vector_double(len(self.rec))
for i in range(len(self.rec)):
idx = self.rec[i].a != 0
if numpy.any(idx):
self.epsilon[i] = abs(self.rec[i].a[idx]).min() / 10
self.epsilon = kwargs.pop("epsilon", None)
if self.epsilon is None:
self.epsilon = libcore.Vector_double(len(self.rec))
for i in range(len(self.rec)):
idx = self.rec[i].a != 0
if numpy.any(idx):
self.epsilon[i] = abs(self.rec[i].a[idx]).min() / 10
self.max_BE = max_BE
self.use_hash = self.B > self.max_BE
self.allow_empty = allow_empty
self.allow_empty = True
self._abg = self.bg._get_any()
self._state = libinference.make_overlap_block_state(self, _get_rng())
......@@ -264,7 +266,7 @@ class OverlapBlockState(BlockState):
self._entropy_args = dict(adjacency=True, dl=True, partition_dl=True,
degree_dl=True, degree_dl_kind="distributed",
edges_dl=True, dense=False, multigraph=True,
exact=True, recs=True)
exact=True, recs=True, recs_dl=True)
self._coupled_state = None
......@@ -325,12 +327,18 @@ class OverlapBlockState(BlockState):
eindex=kwargs.get("eindex", self.eindex),
max_BE=kwargs.get("max_BE", self.max_BE),
base_g=kwargs.get("base_g", self.base_g),
Lrecdx=kwargs.pop("Lrecdx", self.Lrecdx.copy()),
epsilon=kwargs.pop("epsilon",
self.epsilon.copy()),
**dmask(kwargs, ["half_edges", "node_index",
"eindex", "base_g", "drec",
"max_BE"]))
if self._coupled_state is not None:
state._couple_state(state.get_block_state(b=state.get_bclabel(),
copy_bg=False),
vweight="nonempty",
copy_bg=False,
Lrecdx=state.Lrecdx,
allow_empty=False),
self._coupled_state[1])
return state
......@@ -437,7 +445,7 @@ class OverlapBlockState(BlockState):
def entropy(self, adjacency=True, dl=True, partition_dl=True,
degree_dl=True, degree_dl_kind="distributed", edges_dl=True,
dense=False, multigraph=True, deg_entropy=True, recs=True,
exact=True, **kwargs):
recs_dl=True, exact=True, **kwargs):
r"""Calculate the entropy associated with the current block partition.
Parameters
......@@ -470,6 +478,9 @@ class OverlapBlockState(BlockState):
recs : ``bool`` (optional, default: ``True``)
If ``True``, the likelihood for real or discrete-valued edge
covariates is computed.
recs_dl : ``bool`` (optional, default: ``True``)
If ``True``, and ``dl == True`` the edge covariate description
length will be included.
exact : ``bool`` (optional, default: ``True``)
If ``True``, the exact expressions will be used. Otherwise,
Stirling's factorial approximation will be used for some terms.
......@@ -577,7 +588,7 @@ class OverlapBlockState(BlockState):
edges_dl=edges_dl, dense=dense,
multigraph=multigraph,
deg_entropy=deg_entropy, recs=recs,
exact=exact, **kwargs)
recs_dl=recs_dl, exact=exact, **kwargs)