Commit a5574175 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Implement SBM inference with layers and edge covariates

parent 5075e605
......@@ -5,6 +5,7 @@
.. autofunction:: minimize_blockmodel_dl
.. autoclass:: BlockState
.. autoclass:: OverlapBlockState
.. autoclass:: CovariateBlockState
.. autofunction:: mcmc_sweep
.. autoclass:: MinimizeState
.. autofunction:: multilevel_minimize
......
......@@ -16,6 +16,7 @@
.. autofunction:: graph_draw
.. autofunction:: draw_hierarchy
.. autofunction:: graphviz_draw
.. autofunction:: prop_to_size
.. autofunction:: get_hierarchy_control_points
......
......@@ -17,6 +17,7 @@ libgraph_tool_community_la_LDFLAGS = $(MOD_LDFLAGS)
libgraph_tool_community_la_SOURCES = \
graph_blockmodel.cc \
graph_blockmodel_overlap.cc \
graph_blockmodel_covariates.cc \
graph_community.cc \
graph_community_network.cc \
graph_community_network_edges.cc \
......
......@@ -199,7 +199,7 @@ struct move_sweep_dispatch
vector<int>& vlist, bool deg_corr, bool dense,
bool multigraph, double beta, bool sequential,
bool parallel, bool random_move, double c, bool verbose,
size_t max_edge_index, size_t nmerges, size_t ntries,
size_t max_edge_index, size_t nmerges, size_t niter,
Vprop merge_map, partition_stats_t& partition_stats,
rng_t& rng, double& S, size_t& nmoves,
GraphInterface& bgi)
......@@ -209,7 +209,7 @@ struct move_sweep_dispatch
deg_corr(deg_corr), dense(dense), multigraph(multigraph), beta(beta),
sequential(sequential), parallel(parallel), random_move(random_move),
c(c), verbose(verbose), max_edge_index(max_edge_index),
nmerges(nmerges), ntries(ntries), merge_map(merge_map),
nmerges(nmerges), niter(niter), merge_map(merge_map),
partition_stats(partition_stats), rng(rng), S(S),
nmoves(nmoves), bgi(bgi)
{}
......@@ -233,7 +233,7 @@ struct move_sweep_dispatch
bool verbose;
size_t max_edge_index;
size_t nmerges;
size_t ntries;
size_t niter;
Vprop merge_map;
partition_stats_t& partition_stats;
rng_t& rng;
......@@ -269,27 +269,59 @@ struct move_sweep_dispatch
typedef typename property_map_type::apply<DynamicSampler<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >,
GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups);
dispatch(mrs, mrp, mrm, wr, b, g, aemat, asampler, acavity_sampler, bg, egroups);
try
{
typedef typename get_emat_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
size_t B = num_vertices(bg);
size_t max_BE = is_directed::apply<Graph>::type::value ?
B * B : (B * (B + 1)) / 2;
dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g,
asampler, acavity_sampler, bg, egroups, emat);
}
catch (bad_any_cast&)
{
typedef typename get_ehash_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg,
egroups, emat);
}
}
else
{
typedef typename property_map_type::apply<vector<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >,
GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups);
dispatch(mrs, mrp, mrm, wr, b, g, aemat, asampler, acavity_sampler, bg, egroups);
}
}
template <class Graph, class BGraph, class Egroups>
void dispatch(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g,
boost::any& aemat, boost::any asampler,
boost::any acavity_sampler, BGraph& bg, Egroups egroups) const
try
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename get_emat_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
size_t B = num_vertices(bg);
size_t max_BE = is_directed::apply<Graph>::type::value ?
B * B : (B * (B + 1)) / 2;
dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g,
asampler, acavity_sampler, bg, egroups, emat);
}
catch (bad_any_cast&)
{
typedef typename get_ehash_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg,
egroups, emat);
}
}
}
template <class Graph, class BGraph, class Egroups, class Emat, class MEprop>
void dispatch(MEprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g,
boost::any asampler, boost::any acavity_sampler, BGraph& bg,
Egroups egroups, Emat& emat) const
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
size_t eidx = random_move ? 1 : max_edge_index;
......@@ -300,54 +332,46 @@ struct move_sweep_dispatch
sampler_map_t sampler = any_cast<sampler_map_t>(asampler);
sampler_map_t cavity_sampler = any_cast<sampler_map_t>(acavity_sampler);
try
{
typedef typename get_emat_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
ConstantPropertyMap<int, typename graph_traits<Graph>::edge_descriptor> ce(0);
ConstantPropertyMap<std::array<int, 1>, typename graph_traits<Graph>::vertex_descriptor> cv({-1});
IdentityArrayPropertyMap<typename graph_traits<Graph>::vertex_descriptor> vmap;
boost::typed_identity_property_map<int> identity;
// make sure the properties are _unchecked_, since otherwise it
// affects performance
move_sweep(mrs.get_unchecked(max_BE),
overlap_stats_t ostats;
vector<size_t> free_blocks;
auto state = make_block_state(g, eweight.get_unchecked(max_edge_index),
vweight.get_unchecked(num_vertices(g)),
b.get_unchecked(num_vertices(g)), bg,
emat, mrs,
mrp.get_unchecked(num_vertices(bg)),
mrm.get_unchecked(num_vertices(bg)),
wr.get_unchecked(num_vertices(bg)),
b.get_unchecked(num_vertices(g)),
label.get_unchecked(num_vertices(bg)), vlist, deg_corr,
dense, multigraph, beta,
eweight.get_unchecked(max_edge_index),
vweight.get_unchecked(num_vertices(g)),
egroups.get_unchecked(num_vertices(bg)),
esrcpos.get_unchecked(eidx),
etgtpos.get_unchecked(eidx), g, bg, emat, sampler,
cavity_sampler, sequential, parallel, random_move, c,
nmerges, ntries,
merge_map.get_unchecked(num_vertices(g)),
partition_stats, verbose, rng, S, nmoves,
overlap_stats_t());
}
catch (bad_any_cast&)
{
typedef typename get_ehash_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
move_sweep(mrs.get_unchecked(num_edges(g)),
mrp.get_unchecked(num_vertices(bg)),
mrm.get_unchecked(num_vertices(bg)),
etgtpos.get_unchecked(eidx), sampler,
cavity_sampler, partition_stats, ostats,
identity, identity, free_blocks,
false, false, true);
vector<decltype(state)> states = {state};
vector<EntrySet<Graph>> m_entries = {EntrySet<Graph>(num_vertices(bg))};
move_sweep(states, m_entries,
wr.get_unchecked(num_vertices(bg)),
b.get_unchecked(num_vertices(g)),
ce, cv, vmap,
label.get_unchecked(num_vertices(bg)), vlist, deg_corr,
dense, multigraph, beta,
eweight.get_unchecked(max_edge_index),
vweight.get_unchecked(num_vertices(g)),
egroups.get_unchecked(num_vertices(bg)),
esrcpos.get_unchecked(eidx),
etgtpos.get_unchecked(eidx), g, bg, emat, sampler,
cavity_sampler, sequential, parallel, random_move, c,
nmerges, ntries,
g, sequential, parallel, random_move, c,
nmerges,
merge_map.get_unchecked(num_vertices(g)),
partition_stats, verbose, rng, S, nmoves,
overlap_stats_t());
}
niter, num_vertices(bg),
verbose, rng, S, nmoves, ostats);
}
};
......@@ -364,8 +388,8 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
boost::any oetgtpos, double beta,
bool sequential, bool parallel,
bool random_move, double c, bool weighted,
size_t nmerges, size_t ntries,
boost::any omerge_map,
size_t nmerges, boost::any omerge_map,
size_t niter,
partition_stats_t& partition_stats,
bool verbose, rng_t& rng)
{
......@@ -400,7 +424,7 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
(eweight, vweight, oegroups, esrcpos, etgtpos,
label, vlist, deg_corr, dense, multigraph, beta,
sequential, parallel, random_move, c, verbose,
gi.GetMaxEdgeIndex(), nmerges, ntries, merge_map,
gi.GetMaxEdgeIndex(), nmerges, niter, merge_map,
partition_stats, rng, S, nmoves, bgi),
mrs, mrp, mrm, wr, b, placeholders::_1,
std::ref(emat), sampler, cavity_sampler, weighted))();
......@@ -562,7 +586,7 @@ struct get_deg_entropy_term_overlap
double& S) const
{
#ifdef HAVE_SPARSEHASH
typedef dense_hash_map<int, int> map_t;
typedef dense_hash_map<int, int, std::hash<int>> map_t;
#else
typedef unordered_map<int, int> map_t;
#endif
......@@ -627,31 +651,33 @@ vector<int32_t> get_vector(size_t n)
return vector<int32_t>(n);
}
template <class Value>
void vector_map(boost::python::object ovals, boost::python::object omap)
{
multi_array_ref<int32_t,1> vals = get_array<int32_t,1>(ovals);
multi_array_ref<int32_t,1> map = get_array<int32_t,1>(omap);
multi_array_ref<Value,1> vals = get_array<Value,1>(ovals);
multi_array_ref<Value,1> map = get_array<Value,1>(omap);
size_t pos = 0;
for (size_t i = 0; i < vals.size(); ++i)
{
int32_t v = vals[i];
Value v = vals[i];
if (map[v] == -1)
map[v] = pos++;
vals[i] = map[v];
}
}
template <class Value>
void vector_continuous_map(boost::python::object ovals)
{
multi_array_ref<int32_t,1> vals = get_array<int32_t,1>(ovals);
unordered_map<int32_t, size_t> map;
multi_array_ref<Value,1> vals = get_array<Value,1>(ovals);
unordered_map<Value, size_t> map;
for (size_t i = 0; i < vals.size(); ++i)
{
int32_t v = vals[i];
Value v = vals[i];
auto iter = map.find(v);
if (iter == map.end())
iter = map.insert(make_pair(v, map.size())).first;
......@@ -659,11 +685,12 @@ void vector_continuous_map(boost::python::object ovals)
}
}
template <class Value>
void vector_rmap(boost::python::object ovals, boost::python::object omap)
{
multi_array_ref<int32_t,1> vals = get_array<int32_t,1>(ovals);
multi_array_ref<int32_t,1> map = get_array<int32_t,1>(omap);
multi_array_ref<Value,1> vals = get_array<Value,1>(ovals);
multi_array_ref<Value,1> map = get_array<Value,1>(omap);
for (size_t i = 0; i < vals.size(); ++i)
{
......@@ -682,15 +709,15 @@ struct get_partition_stats
{
template <class Graph, class Vprop, class Eprop>
void operator()(Graph& g, Vprop b, Eprop eweight, size_t N, size_t B,
partition_stats_t& partition_stats) const
bool edges_dl, partition_stats_t& partition_stats) const
{
partition_stats = partition_stats_t(g, b, eweight, N, B);
partition_stats = partition_stats_t(g, b, eweight, N, B, edges_dl);
}
};
partition_stats_t
do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight,
size_t N, size_t B)
size_t N, size_t B, bool edges_dl)
{
typedef property_map_type::apply<int32_t,
GraphInterface::vertex_index_map_t>::type
......@@ -705,7 +732,7 @@ do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight,
emap_t eweight = any_cast<emap_t>(aeweight);
run_action<>()(gi, std::bind(get_partition_stats(),
placeholders::_1, b, eweight, N, B,
placeholders::_1, b, eweight, N, B, edges_dl,
std::ref(partition_stats)))();
return partition_stats;
}
......@@ -732,14 +759,13 @@ void export_blockmodel()
def("get_mu_l", python_get_mu_l);
def("polylog", polylog<double>);
def("poisson_entropy", poisson_entropy<double>);
def("lpoisson", lpoisson<double>);
def("poisson", poisson<double>);
def("get_vector", get_vector);
def("vector_map", vector_map);
def("vector_rmap", vector_rmap);
def("vector_continuous_map", vector_continuous_map);
def("vector_map", vector_map<int32_t>);
def("vector_map64", vector_map<int64_t>);
def("vector_rmap", vector_rmap<int32_t>);
def("vector_rmap64", vector_rmap<int64_t>);
def("vector_continuous_map", vector_continuous_map<int32_t>);
def("vector_continuous_map64", vector_continuous_map<int64_t>);
def("create_emat", do_create_emat);
def("create_ehash", do_create_ehash);
......
This diff is collapsed.
This diff is collapsed.
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef GRAPH_BLOCKMODEL_COVARIATE_HH
#define GRAPH_BLOCKMODEL_COVARIATE_HH
#include <cmath>
#include <iostream>
#include <queue>
#include <boost/math/special_functions/zeta.hpp>
#include <boost/functional/hash.hpp>
#include "config.h"
#include <unordered_set>
#include <unordered_map>
#include <tuple>
#ifdef HAVE_SPARSEHASH
#include SPARSEHASH_INCLUDE(dense_hash_set)
#include SPARSEHASH_INCLUDE(dense_hash_map)
#endif
namespace graph_tool
{
#ifdef HAVE_SPARSEHASH
using google::dense_hash_set;
using google::dense_hash_map;
#endif
using namespace boost;
// this will label each edge covariate in a consecutive range [0, C-1]
struct ec_hist
{
template <class Graph, class EVMap, class EMap>
void operator()(Graph&g, const EVMap& ev, EMap& ec) const
{
typedef typename property_traits<EVMap>::value_type val_t;
unordered_map<val_t, size_t> ehist;
for (auto e : edges_range(g))
{
auto iter = ehist.find(ev[e]);
size_t x;
if (iter == ehist.end())
{
x = ehist.size();
ehist[ev[e]] = x;
}
else
{
x = iter->second;
}
ec[e] = x;
}
}
};
// this will split an edge-valued graph into layers
struct split_graph
{
template <class Graph, class ECMap, class EMap, class VMap, class VVMap, class BMap>
void operator()(Graph& g, ECMap& ec, VMap& b, EMap& eweight, VMap& vweight,
VVMap& vc, VVMap& vmap,
std::vector<std::reference_wrapper<GraphInterface>>& us,
std::vector<std::reference_wrapper<VMap>>& ub,
std::vector<std::reference_wrapper<VMap>>& uvweight,
std::vector<std::reference_wrapper<EMap>>& ueweight,
std::vector<BMap>& block_map,
std::vector<std::reference_wrapper<VMap>>& block_rmap,
std::vector<std::reference_wrapper<VMap>>& uvmap) const
{
std::vector<unordered_map<size_t, size_t>> vhmap(num_vertices(g));
auto get_v = [&] (size_t v, size_t l) -> size_t
{
auto iter = vhmap[v].find(l);
if (iter == vhmap[v].end())
{
size_t u = add_vertex(us[l].get().GetGraph());
vhmap[v][l] = u;
size_t pos = lower_bound(vc[v].begin(), vc[v].end(), l) - vc[v].begin();
vc[v].insert(vc[v].begin() + pos, l);
vmap[v].insert(vmap[v].begin() + pos, u);
uvmap[l].get()[u] = v;
uvweight[l].get()[u] = vweight[v];
size_t r = b[v];
size_t u_r;
if (block_map.size() <= l + 1)
{
size_t n = block_map.size();
block_map.resize(l + 2);
#ifdef HAVE_SPARSEHASH
for (size_t i = n; i < block_map.size(); ++i)
{
block_map[i].set_empty_key(numeric_limits<size_t>::max());
block_map[i].set_deleted_key(numeric_limits<size_t>::max() - 1);
}
#endif
}
auto& bmap = block_map[l + 1];
auto riter = bmap.find(r);
if (riter == bmap.end())
{
u_r = bmap.size();
bmap[r] = u_r;
block_rmap[l].get()[u_r] = r;
}
else
{
u_r = riter->second;
}
ub[l].get()[u] = u_r;
return u;
}
else
{
return iter->second;
}
};
for (auto e : edges_range(g))
{
auto s = source(e, g);
auto t = target(e, g);
size_t l = ec[e];
auto u_s = get_v(s, l);
auto u_t = get_v(t, l);
auto ne = add_edge(u_s, u_t, us[l].get().GetGraph()).first;
ueweight[l].get()[ne] = eweight[e];
}
}
};
#ifdef HAVE_SPARSEHASH
typedef vector<dense_hash_map<size_t, size_t, std::hash<size_t>>> bmap_t;
#else
typedef vector<unordered_map<size_t, size_t>> bmap_t;
#endif
template <class BlockState>
size_t get_block_map(BlockState& state, typename bmap_t::value_type& bmap,
size_t r)
{
size_t r_u;
#pragma omp critical (covariate_block_map)
{
auto iter = bmap.find(r);
if (iter == bmap.end())
{
if (state.free_blocks.empty())
{
r_u = bmap.size();
}
else
{
r_u = state.free_blocks.back();
state.free_blocks.pop_back();
}
bmap[r] = r_u;
state.block_rmap[r_u] = r;
}
else
{
r_u = iter->second;
}
assert(r_u < num_vertices(state.bg));
}
// assert(state.block_rmap[r_u] == r);
// assert(r_u < num_vertices(state.bg));
return r_u;
}
template <class BlockState>
void remove_block_map(BlockState& state, typename bmap_t::value_type& bmap,
size_t r)
{
#pragma omp critical (covariate_block_map)
{
auto iter = bmap.find(r);
if (iter != bmap.end()) // another thread may have removed it already
{
state.free_blocks.push_back(iter->second);
bmap.erase(iter);
}
// assert(bmap.find(r) == bmap.end());
}
}
}; // graph_tool namespace
#endif // GRAPH_BLOCKMODEL_COVARIATE_HH
......@@ -100,9 +100,9 @@ using namespace boost::python;
extern void community_network(GraphInterface& gi, GraphInterface& cgi,
boost::any community_property,
boost::any condensed_community_property,
boost::any vertex_count,
boost::any edge_count, boost::any vweight,
boost::any eweight, bool self_loops);
boost::any vertex_count, boost::any edge_count,
boost::any vweight, boost::any eweight,
bool self_loops, bool parallel_edges);
void community_network_vavg(GraphInterface& gi, GraphInterface& cgi,
boost::any community_property,
......@@ -122,6 +122,7 @@ void community_network_eavg(GraphInterface& gi, GraphInterface& cgi,
extern void export_blockmodel();
extern void export_blockmodel_overlap();
extern void export_blockmodel_covariate();
BOOST_PYTHON_MODULE(libgraph_tool_community)
{
......@@ -133,4 +134,5 @@ BOOST_PYTHON_MODULE(libgraph_tool_community)
export_blockmodel();
export_blockmodel_overlap();
export_blockmodel_covariate();
}
......@@ -84,15 +84,14 @@ void community_network_edges(GraphInterface& gi, GraphInterface& cgi,
boost::any community_property,
boost::any condensed_community_property,