Commit a5574175 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Implement SBM inference with layers and edge covariates

parent 5075e605
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
.. autofunction:: minimize_blockmodel_dl .. autofunction:: minimize_blockmodel_dl
.. autoclass:: BlockState .. autoclass:: BlockState
.. autoclass:: OverlapBlockState .. autoclass:: OverlapBlockState
.. autoclass:: CovariateBlockState
.. autofunction:: mcmc_sweep .. autofunction:: mcmc_sweep
.. autoclass:: MinimizeState .. autoclass:: MinimizeState
.. autofunction:: multilevel_minimize .. autofunction:: multilevel_minimize
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
.. autofunction:: graph_draw .. autofunction:: graph_draw
.. autofunction:: draw_hierarchy
.. autofunction:: graphviz_draw .. autofunction:: graphviz_draw
.. autofunction:: prop_to_size .. autofunction:: prop_to_size
.. autofunction:: get_hierarchy_control_points .. autofunction:: get_hierarchy_control_points
......
...@@ -17,6 +17,7 @@ libgraph_tool_community_la_LDFLAGS = $(MOD_LDFLAGS) ...@@ -17,6 +17,7 @@ libgraph_tool_community_la_LDFLAGS = $(MOD_LDFLAGS)
libgraph_tool_community_la_SOURCES = \ libgraph_tool_community_la_SOURCES = \
graph_blockmodel.cc \ graph_blockmodel.cc \
graph_blockmodel_overlap.cc \ graph_blockmodel_overlap.cc \
graph_blockmodel_covariates.cc \
graph_community.cc \ graph_community.cc \
graph_community_network.cc \ graph_community_network.cc \
graph_community_network_edges.cc \ graph_community_network_edges.cc \
......
...@@ -199,7 +199,7 @@ struct move_sweep_dispatch ...@@ -199,7 +199,7 @@ struct move_sweep_dispatch
vector<int>& vlist, bool deg_corr, bool dense, vector<int>& vlist, bool deg_corr, bool dense,
bool multigraph, double beta, bool sequential, bool multigraph, double beta, bool sequential,
bool parallel, bool random_move, double c, bool verbose, bool parallel, bool random_move, double c, bool verbose,
size_t max_edge_index, size_t nmerges, size_t ntries, size_t max_edge_index, size_t nmerges, size_t niter,
Vprop merge_map, partition_stats_t& partition_stats, Vprop merge_map, partition_stats_t& partition_stats,
rng_t& rng, double& S, size_t& nmoves, rng_t& rng, double& S, size_t& nmoves,
GraphInterface& bgi) GraphInterface& bgi)
...@@ -209,7 +209,7 @@ struct move_sweep_dispatch ...@@ -209,7 +209,7 @@ struct move_sweep_dispatch
deg_corr(deg_corr), dense(dense), multigraph(multigraph), beta(beta), deg_corr(deg_corr), dense(dense), multigraph(multigraph), beta(beta),
sequential(sequential), parallel(parallel), random_move(random_move), sequential(sequential), parallel(parallel), random_move(random_move),
c(c), verbose(verbose), max_edge_index(max_edge_index), c(c), verbose(verbose), max_edge_index(max_edge_index),
nmerges(nmerges), ntries(ntries), merge_map(merge_map), nmerges(nmerges), niter(niter), merge_map(merge_map),
partition_stats(partition_stats), rng(rng), S(S), partition_stats(partition_stats), rng(rng), S(S),
nmoves(nmoves), bgi(bgi) nmoves(nmoves), bgi(bgi)
{} {}
...@@ -233,7 +233,7 @@ struct move_sweep_dispatch ...@@ -233,7 +233,7 @@ struct move_sweep_dispatch
bool verbose; bool verbose;
size_t max_edge_index; size_t max_edge_index;
size_t nmerges; size_t nmerges;
size_t ntries; size_t niter;
Vprop merge_map; Vprop merge_map;
partition_stats_t& partition_stats; partition_stats_t& partition_stats;
rng_t& rng; rng_t& rng;
...@@ -269,28 +269,60 @@ struct move_sweep_dispatch ...@@ -269,28 +269,60 @@ struct move_sweep_dispatch
typedef typename property_map_type::apply<DynamicSampler<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >, typedef typename property_map_type::apply<DynamicSampler<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >,
GraphInterface::vertex_index_map_t>::type vemap_t; GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups); vemap_t egroups = any_cast<vemap_t>(oegroups);
dispatch(mrs, mrp, mrm, wr, b, g, aemat, asampler, acavity_sampler, bg, egroups);
try
{
typedef typename get_emat_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
size_t B = num_vertices(bg);
size_t max_BE = is_directed::apply<Graph>::type::value ?
B * B : (B * (B + 1)) / 2;
dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g,
asampler, acavity_sampler, bg, egroups, emat);
}
catch (bad_any_cast&)
{
typedef typename get_ehash_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg,
egroups, emat);
}
} }
else else
{ {
typedef typename property_map_type::apply<vector<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >, typedef typename property_map_type::apply<vector<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >,
GraphInterface::vertex_index_map_t>::type vemap_t; GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups); vemap_t egroups = any_cast<vemap_t>(oegroups);
dispatch(mrs, mrp, mrm, wr, b, g, aemat, asampler, acavity_sampler, bg, egroups);
try
{
typedef typename get_emat_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
size_t B = num_vertices(bg);
size_t max_BE = is_directed::apply<Graph>::type::value ?
B * B : (B * (B + 1)) / 2;
dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g,
asampler, acavity_sampler, bg, egroups, emat);
}
catch (bad_any_cast&)
{
typedef typename get_ehash_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg,
egroups, emat);
}
} }
} }
template <class Graph, class BGraph, class Egroups> template <class Graph, class BGraph, class Egroups, class Emat, class MEprop>
void dispatch(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, void dispatch(MEprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g,
boost::any& aemat, boost::any asampler, boost::any asampler, boost::any acavity_sampler, BGraph& bg,
boost::any acavity_sampler, BGraph& bg, Egroups egroups) const Egroups egroups, Emat& emat) const
{ {
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
size_t B = num_vertices(bg);
size_t max_BE = is_directed::apply<Graph>::type::value ?
B * B : (B * (B + 1)) / 2;
size_t eidx = random_move ? 1 : max_edge_index; size_t eidx = random_move ? 1 : max_edge_index;
typedef typename property_map<Graph, vertex_index_t>::type vindex_map_t; typedef typename property_map<Graph, vertex_index_t>::type vindex_map_t;
...@@ -300,54 +332,46 @@ struct move_sweep_dispatch ...@@ -300,54 +332,46 @@ struct move_sweep_dispatch
sampler_map_t sampler = any_cast<sampler_map_t>(asampler); sampler_map_t sampler = any_cast<sampler_map_t>(asampler);
sampler_map_t cavity_sampler = any_cast<sampler_map_t>(acavity_sampler); sampler_map_t cavity_sampler = any_cast<sampler_map_t>(acavity_sampler);
try ConstantPropertyMap<int, typename graph_traits<Graph>::edge_descriptor> ce(0);
{ ConstantPropertyMap<std::array<int, 1>, typename graph_traits<Graph>::vertex_descriptor> cv({-1});
typedef typename get_emat_t::apply<BGraph>::type emat_t; IdentityArrayPropertyMap<typename graph_traits<Graph>::vertex_descriptor> vmap;
emat_t& emat = any_cast<emat_t&>(aemat); boost::typed_identity_property_map<int> identity;
// make sure the properties are _unchecked_, since otherwise it // make sure the properties are _unchecked_, since otherwise it
// affects performance // affects performance
move_sweep(mrs.get_unchecked(max_BE), overlap_stats_t ostats;
mrp.get_unchecked(num_vertices(bg)), vector<size_t> free_blocks;
mrm.get_unchecked(num_vertices(bg)), auto state = make_block_state(g, eweight.get_unchecked(max_edge_index),
wr.get_unchecked(num_vertices(bg)), vweight.get_unchecked(num_vertices(g)),
b.get_unchecked(num_vertices(g)), b.get_unchecked(num_vertices(g)), bg,
label.get_unchecked(num_vertices(bg)), vlist, deg_corr, emat, mrs,
dense, multigraph, beta, mrp.get_unchecked(num_vertices(bg)),
eweight.get_unchecked(max_edge_index), mrm.get_unchecked(num_vertices(bg)),
vweight.get_unchecked(num_vertices(g)), wr.get_unchecked(num_vertices(bg)),
egroups.get_unchecked(num_vertices(bg)), egroups.get_unchecked(num_vertices(bg)),
esrcpos.get_unchecked(eidx), esrcpos.get_unchecked(eidx),
etgtpos.get_unchecked(eidx), g, bg, emat, sampler, etgtpos.get_unchecked(eidx), sampler,
cavity_sampler, sequential, parallel, random_move, c, cavity_sampler, partition_stats, ostats,
nmerges, ntries, identity, identity, free_blocks,
merge_map.get_unchecked(num_vertices(g)), false, false, true);
partition_stats, verbose, rng, S, nmoves,
overlap_stats_t()); vector<decltype(state)> states = {state};
} vector<EntrySet<Graph>> m_entries = {EntrySet<Graph>(num_vertices(bg))};
catch (bad_any_cast&)
{ move_sweep(states, m_entries,
typedef typename get_ehash_t::apply<BGraph>::type emat_t; wr.get_unchecked(num_vertices(bg)),
emat_t& emat = any_cast<emat_t&>(aemat); b.get_unchecked(num_vertices(g)),
move_sweep(mrs.get_unchecked(num_edges(g)), ce, cv, vmap,
mrp.get_unchecked(num_vertices(bg)), label.get_unchecked(num_vertices(bg)), vlist, deg_corr,
mrm.get_unchecked(num_vertices(bg)), dense, multigraph, beta,
wr.get_unchecked(num_vertices(bg)), eweight.get_unchecked(max_edge_index),
b.get_unchecked(num_vertices(g)), vweight.get_unchecked(num_vertices(g)),
label.get_unchecked(num_vertices(bg)), vlist, deg_corr, g, sequential, parallel, random_move, c,
dense, multigraph, beta, nmerges,
eweight.get_unchecked(max_edge_index), merge_map.get_unchecked(num_vertices(g)),
vweight.get_unchecked(num_vertices(g)), niter, num_vertices(bg),
egroups.get_unchecked(num_vertices(bg)), verbose, rng, S, nmoves, ostats);
esrcpos.get_unchecked(eidx),
etgtpos.get_unchecked(eidx), g, bg, emat, sampler,
cavity_sampler, sequential, parallel, random_move, c,
nmerges, ntries,
merge_map.get_unchecked(num_vertices(g)),
partition_stats, verbose, rng, S, nmoves,
overlap_stats_t());
}
} }
}; };
...@@ -364,8 +388,8 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi, ...@@ -364,8 +388,8 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
boost::any oetgtpos, double beta, boost::any oetgtpos, double beta,
bool sequential, bool parallel, bool sequential, bool parallel,
bool random_move, double c, bool weighted, bool random_move, double c, bool weighted,
size_t nmerges, size_t ntries, size_t nmerges, boost::any omerge_map,
boost::any omerge_map, size_t niter,
partition_stats_t& partition_stats, partition_stats_t& partition_stats,
bool verbose, rng_t& rng) bool verbose, rng_t& rng)
{ {
...@@ -400,7 +424,7 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi, ...@@ -400,7 +424,7 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
(eweight, vweight, oegroups, esrcpos, etgtpos, (eweight, vweight, oegroups, esrcpos, etgtpos,
label, vlist, deg_corr, dense, multigraph, beta, label, vlist, deg_corr, dense, multigraph, beta,
sequential, parallel, random_move, c, verbose, sequential, parallel, random_move, c, verbose,
gi.GetMaxEdgeIndex(), nmerges, ntries, merge_map, gi.GetMaxEdgeIndex(), nmerges, niter, merge_map,
partition_stats, rng, S, nmoves, bgi), partition_stats, rng, S, nmoves, bgi),
mrs, mrp, mrm, wr, b, placeholders::_1, mrs, mrp, mrm, wr, b, placeholders::_1,
std::ref(emat), sampler, cavity_sampler, weighted))(); std::ref(emat), sampler, cavity_sampler, weighted))();
...@@ -562,7 +586,7 @@ struct get_deg_entropy_term_overlap ...@@ -562,7 +586,7 @@ struct get_deg_entropy_term_overlap
double& S) const double& S) const
{ {
#ifdef HAVE_SPARSEHASH #ifdef HAVE_SPARSEHASH
typedef dense_hash_map<int, int> map_t; typedef dense_hash_map<int, int, std::hash<int>> map_t;
#else #else
typedef unordered_map<int, int> map_t; typedef unordered_map<int, int> map_t;
#endif #endif
...@@ -627,31 +651,33 @@ vector<int32_t> get_vector(size_t n) ...@@ -627,31 +651,33 @@ vector<int32_t> get_vector(size_t n)
return vector<int32_t>(n); return vector<int32_t>(n);
} }
template <class Value>
void vector_map(boost::python::object ovals, boost::python::object omap) void vector_map(boost::python::object ovals, boost::python::object omap)
{ {
multi_array_ref<int32_t,1> vals = get_array<int32_t,1>(ovals); multi_array_ref<Value,1> vals = get_array<Value,1>(ovals);
multi_array_ref<int32_t,1> map = get_array<int32_t,1>(omap); multi_array_ref<Value,1> map = get_array<Value,1>(omap);
size_t pos = 0; size_t pos = 0;
for (size_t i = 0; i < vals.size(); ++i) for (size_t i = 0; i < vals.size(); ++i)
{ {
int32_t v = vals[i]; Value v = vals[i];
if (map[v] == -1) if (map[v] == -1)
map[v] = pos++; map[v] = pos++;
vals[i] = map[v]; vals[i] = map[v];
} }
} }
template <class Value>
void vector_continuous_map(boost::python::object ovals) void vector_continuous_map(boost::python::object ovals)
{ {
multi_array_ref<int32_t,1> vals = get_array<int32_t,1>(ovals); multi_array_ref<Value,1> vals = get_array<Value,1>(ovals);
unordered_map<int32_t, size_t> map; unordered_map<Value, size_t> map;
for (size_t i = 0; i < vals.size(); ++i) for (size_t i = 0; i < vals.size(); ++i)
{ {
int32_t v = vals[i]; Value v = vals[i];
auto iter = map.find(v); auto iter = map.find(v);
if (iter == map.end()) if (iter == map.end())
iter = map.insert(make_pair(v, map.size())).first; iter = map.insert(make_pair(v, map.size())).first;
...@@ -659,11 +685,12 @@ void vector_continuous_map(boost::python::object ovals) ...@@ -659,11 +685,12 @@ void vector_continuous_map(boost::python::object ovals)
} }
} }
template <class Value>
void vector_rmap(boost::python::object ovals, boost::python::object omap) void vector_rmap(boost::python::object ovals, boost::python::object omap)
{ {
multi_array_ref<int32_t,1> vals = get_array<int32_t,1>(ovals); multi_array_ref<Value,1> vals = get_array<Value,1>(ovals);
multi_array_ref<int32_t,1> map = get_array<int32_t,1>(omap); multi_array_ref<Value,1> map = get_array<Value,1>(omap);
for (size_t i = 0; i < vals.size(); ++i) for (size_t i = 0; i < vals.size(); ++i)
{ {
...@@ -682,15 +709,15 @@ struct get_partition_stats ...@@ -682,15 +709,15 @@ struct get_partition_stats
{ {
template <class Graph, class Vprop, class Eprop> template <class Graph, class Vprop, class Eprop>
void operator()(Graph& g, Vprop b, Eprop eweight, size_t N, size_t B, void operator()(Graph& g, Vprop b, Eprop eweight, size_t N, size_t B,
partition_stats_t& partition_stats) const bool edges_dl, partition_stats_t& partition_stats) const
{ {
partition_stats = partition_stats_t(g, b, eweight, N, B); partition_stats = partition_stats_t(g, b, eweight, N, B, edges_dl);
} }
}; };
partition_stats_t partition_stats_t
do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight, do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight,
size_t N, size_t B) size_t N, size_t B, bool edges_dl)
{ {
typedef property_map_type::apply<int32_t, typedef property_map_type::apply<int32_t,
GraphInterface::vertex_index_map_t>::type GraphInterface::vertex_index_map_t>::type
...@@ -705,7 +732,7 @@ do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight, ...@@ -705,7 +732,7 @@ do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight,
emap_t eweight = any_cast<emap_t>(aeweight); emap_t eweight = any_cast<emap_t>(aeweight);
run_action<>()(gi, std::bind(get_partition_stats(), run_action<>()(gi, std::bind(get_partition_stats(),
placeholders::_1, b, eweight, N, B, placeholders::_1, b, eweight, N, B, edges_dl,
std::ref(partition_stats)))(); std::ref(partition_stats)))();
return partition_stats; return partition_stats;
} }
...@@ -732,14 +759,13 @@ void export_blockmodel() ...@@ -732,14 +759,13 @@ void export_blockmodel()
def("get_mu_l", python_get_mu_l); def("get_mu_l", python_get_mu_l);
def("polylog", polylog<double>); def("polylog", polylog<double>);
def("poisson_entropy", poisson_entropy<double>);
def("lpoisson", lpoisson<double>);
def("poisson", poisson<double>);
def("get_vector", get_vector); def("get_vector", get_vector);
def("vector_map", vector_map); def("vector_map", vector_map<int32_t>);
def("vector_rmap", vector_rmap); def("vector_map64", vector_map<int64_t>);
def("vector_continuous_map", vector_continuous_map); def("vector_rmap", vector_rmap<int32_t>);
def("vector_rmap64", vector_rmap<int64_t>);
def("vector_continuous_map", vector_continuous_map<int32_t>);
def("vector_continuous_map64", vector_continuous_map<int64_t>);
def("create_emat", do_create_emat); def("create_emat", do_create_emat);
def("create_ehash", do_create_ehash); def("create_ehash", do_create_ehash);
......
This diff is collapsed.
This diff is collapsed.
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef GRAPH_BLOCKMODEL_COVARIATE_HH
#define GRAPH_BLOCKMODEL_COVARIATE_HH
#include <cmath>
#include <iostream>
#include <queue>
#include <boost/math/special_functions/zeta.hpp>
#include <boost/functional/hash.hpp>
#include "config.h"
#include <unordered_set>
#include <unordered_map>
#include <tuple>
#ifdef HAVE_SPARSEHASH
#include SPARSEHASH_INCLUDE(dense_hash_set)
#include SPARSEHASH_INCLUDE(dense_hash_map)
#endif
namespace graph_tool
{
#ifdef HAVE_SPARSEHASH
using google::dense_hash_set;
using google::dense_hash_map;
#endif
using namespace boost;
// this will label each edge covariate in a consecutive range [0, C-1]
struct ec_hist
{
template <class Graph, class EVMap, class EMap>
void operator()(Graph&g, const EVMap& ev, EMap& ec) const
{
typedef typename property_traits<EVMap>::value_type val_t;
unordered_map<val_t, size_t> ehist;
for (auto e : edges_range(g))
{
auto iter = ehist.find(ev[e]);
size_t x;
if (iter == ehist.end())
{
x = ehist.size();
ehist[ev[e]] = x;
}
else
{
x = iter->second;
}
ec[e] = x;
}
}
};
// this will split an edge-valued graph into layers
struct split_graph
{
template <class Graph, class ECMap, class EMap, class VMap, class VVMap, class BMap>
void operator()(Graph& g, ECMap& ec, VMap& b, EMap& eweight, VMap& vweight,
VVMap& vc, VVMap& vmap,
std::vector<std::reference_wrapper<GraphInterface>>& us,
std::vector<std::reference_wrapper<VMap>>& ub,
std::vector<std::reference_wrapper<VMap>>& uvweight,
std::vector<std::reference_wrapper<EMap>>& ueweight,
std::vector<BMap>& block_map,
std::vector<std::reference_wrapper<VMap>>& block_rmap,
std::vector<std::reference_wrapper<VMap>>& uvmap) const
{
std::vector<unordered_map<size_t, size_t>> vhmap(num_vertices(g));
auto get_v = [&] (size_t v, size_t l) -> size_t
{
auto iter = vhmap[v].find(l);
if (iter == vhmap[v].end())
{
size_t u = add_vertex(us[l].get().GetGraph());
vhmap[v][l] = u;
size_t pos = lower_bound(vc[v].begin(), vc[v].end(), l) - vc[v].begin();
vc[v].insert(vc[v].begin() + pos, l);
vmap[v].insert(vmap[v].begin() + pos, u);
uvmap[l].get()[u] = v;
uvweight[l].get()[u] = vweight[v];
size_t r = b[v];
size_t u_r;
if (block_map.size() <= l + 1)
{
size_t n = block_map.size();
block_map.resize(l + 2);
#ifdef HAVE_SPARSEHASH
for (size_t i = n; i < block_map.size(); ++i)
{
block_map[i].set_empty_key(numeric_limits<size_t>::max());
block_map[i].set_deleted_key(numeric_limits<size_t>::max() - 1);
}
#endif
}
auto& bmap = block_map[l + 1];
auto riter = bmap.find(r);
if (riter == bmap.end())
{
u_r = bmap.size();
bmap[r] = u_r;
block_rmap[l].get()[u_r] = r;
}
else
{
u_r = riter->second;