// graph-tool -- a general graph modification and manipulation thingy // // Copyright (C) 2006-2015 Tiago de Paula Peixoto // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . #define BOOST_PYTHON_MAX_ARITY 40 #include #include #include #include "numpy_bind.hh" #include #include #include "graph_filtering.hh" #include "graph.hh" #include "graph_selectors.hh" #include "graph_properties.hh" #include "graph_util.hh" #include "random.hh" #include "config.h" #include "graph_blockmodel_overlap.hh" #include "graph_blockmodel.hh" using namespace boost; using namespace graph_tool; // ==================== // Entropy calculation // ==================== // Repeated computation of x*log(x) and log(x) actually adds up to a lot of // time. A significant speedup can be made by caching pre-computed values. This // is doable since the values of mrse are bounded in [0, 2E], where E is the // total number of edges in the network. Here we simply grow the cache as // needed. namespace graph_tool { vector __safelog_cache; vector __xlogx_cache; vector __lgamma_cache; void init_safelog(size_t x) { size_t old_size = __safelog_cache.size(); if (x >= old_size) { __safelog_cache.resize(x + 1); for (size_t i = old_size; i < __safelog_cache.size(); ++i) __safelog_cache[i] = safelog(double(i)); } } void clear_safelog() { vector().swap(__safelog_cache); } void init_xlogx(size_t x) { size_t old_size = __xlogx_cache.size(); if (x >= old_size) { __xlogx_cache.resize(x + 1); for (size_t i = old_size; i < __xlogx_cache.size(); ++i) __xlogx_cache[i] = i * safelog(i); } } void clear_xlogx() { vector().swap(__xlogx_cache); } void init_lgamma(size_t x) { size_t old_size = __lgamma_cache.size(); if (x >= old_size) { __lgamma_cache.resize(x + 1); for (size_t i = old_size; i < __lgamma_cache.size(); ++i) __lgamma_cache[i] = lgamma(i); } } void clear_lgamma() { vector().swap(__lgamma_cache); } } double do_get_ent(GraphInterface& gi, boost::any omrs, boost::any omrp, boost::any omrm, boost::any owr, bool deg_corr) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; emap_t mrs = any_cast(omrs); vmap_t mrp = any_cast(omrp); vmap_t mrm = any_cast(omrm); vmap_t wr = any_cast(owr); double S = 0; run_action<>() (gi, std::bind(entropy(), mrs, mrp, mrm, wr, deg_corr, placeholders::_1, std::ref(S)))(); return S; } double do_get_ent_dense(GraphInterface& gi, boost::any omrs, boost::any owr, bool multigraph) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; emap_t mrs = any_cast(omrs); vmap_t wr = any_cast(owr); double S = 0; run_action<>() (gi, std::bind(entropy_dense(), mrs, wr, multigraph, placeholders::_1, std::ref(S)))(); return S; } double do_get_ent_parallel(GraphInterface& gi, boost::any oweight) { typedef property_map_type::apply::type emap_t; emap_t weight = any_cast(oweight); double S = 0; run_action<>() (gi, std::bind(entropy_parallel_edges(), placeholders::_1, weight, std::ref(S)))(); return S; } boost::any do_create_emat(GraphInterface& gi) { boost::any emat; run_action<>()(gi, std::bind(create_emat(), placeholders::_1, std::ref(emat)))(); return emat; } boost::any do_create_ehash(GraphInterface& gi) { boost::any emat; run_action<>()(gi, std::bind(create_ehash(), placeholders::_1, std::ref(emat)))(); return emat; } //============ // Main loops //============ template struct move_sweep_dispatch { move_sweep_dispatch(Eprop eweight, Vprop vweight, boost::any egroups, VEprop esrcpos, VEprop etgtpos, Vprop label, vector& vlist, bool deg_corr, bool dense, bool multigraph, double beta, bool sequential, bool parallel, bool random_move, double c, bool verbose, size_t max_edge_index, size_t nmerges, size_t niter, Vprop merge_map, partition_stats_t& partition_stats, rng_t& rng, double& S, size_t& nmoves, GraphInterface& bgi) : eweight(eweight), vweight(vweight), oegroups(egroups), esrcpos(esrcpos), etgtpos(etgtpos), label(label), vlist(vlist), deg_corr(deg_corr), dense(dense), multigraph(multigraph), beta(beta), sequential(sequential), parallel(parallel), random_move(random_move), c(c), verbose(verbose), max_edge_index(max_edge_index), nmerges(nmerges), niter(niter), merge_map(merge_map), partition_stats(partition_stats), rng(rng), S(S), nmoves(nmoves), bgi(bgi) {} Eprop eweight; Vprop vweight; boost::any oegroups; VEprop esrcpos; VEprop etgtpos; Vprop label; size_t n; vector& vlist; bool deg_corr; bool dense; bool multigraph; double beta; bool sequential; bool parallel; bool random_move; double c; bool verbose; size_t max_edge_index; size_t nmerges; size_t niter; Vprop merge_map; partition_stats_t& partition_stats; rng_t& rng; double& S; size_t& nmoves; GraphInterface& bgi; template void operator()(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, boost::any& emat, boost::any sampler, boost::any cavity_sampler, bool weighted) const { if (is_directed::apply::type::value) { dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, cavity_sampler, bgi.GetGraph(), weighted); } else { UndirectedAdaptor ug(bgi.GetGraph()); dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, cavity_sampler, ug, weighted); } } template void dispatch(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, boost::any& aemat, boost::any asampler, boost::any acavity_sampler, BGraph& bg, bool weighted) const { if (weighted) { typedef typename property_map_type::apply::edge_descriptor, bool> >, GraphInterface::vertex_index_map_t>::type vemap_t; vemap_t egroups = any_cast(oegroups); try { typedef typename get_emat_t::apply::type emat_t; emat_t& emat = any_cast(aemat); size_t B = num_vertices(bg); size_t max_BE = is_directed::apply::type::value ? B * B : (B * (B + 1)) / 2; dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } catch (bad_any_cast&) { typedef typename get_ehash_t::apply::type emat_t; emat_t& emat = any_cast(aemat); dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } } else { typedef typename property_map_type::apply::edge_descriptor, bool> >, GraphInterface::vertex_index_map_t>::type vemap_t; vemap_t egroups = any_cast(oegroups); try { typedef typename get_emat_t::apply::type emat_t; emat_t& emat = any_cast(aemat); size_t B = num_vertices(bg); size_t max_BE = is_directed::apply::type::value ? B * B : (B * (B + 1)) / 2; dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } catch (bad_any_cast&) { typedef typename get_ehash_t::apply::type emat_t; emat_t& emat = any_cast(aemat); dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } } } template void dispatch(MEprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, boost::any asampler, boost::any acavity_sampler, BGraph& bg, Egroups egroups, Emat& emat) const { typedef typename graph_traits::vertex_descriptor vertex_t; size_t eidx = random_move ? 1 : max_edge_index; typedef typename property_map::type vindex_map_t; typedef typename property_map_type::apply, vindex_map_t>::type::unchecked_t sampler_map_t; sampler_map_t sampler = any_cast(asampler); sampler_map_t cavity_sampler = any_cast(acavity_sampler); ConstantPropertyMap, typename graph_traits::vertex_descriptor> cv({-1}); IdentityArrayPropertyMap::vertex_descriptor> vmap; boost::typed_identity_property_map identity; // make sure the properties are _unchecked_, since otherwise it // affects performance overlap_stats_t ostats; vector free_blocks; auto state = make_block_state(g, eweight.get_unchecked(max_edge_index), vweight.get_unchecked(num_vertices(g)), b.get_unchecked(num_vertices(g)), bg, emat, mrs, mrp.get_unchecked(num_vertices(bg)), mrm.get_unchecked(num_vertices(bg)), wr.get_unchecked(num_vertices(bg)), egroups.get_unchecked(num_vertices(bg)), esrcpos.get_unchecked(eidx), etgtpos.get_unchecked(eidx), sampler, cavity_sampler, partition_stats, ostats, identity, identity, free_blocks, false, false, true); vector states = {state}; vector> m_entries = {EntrySet(num_vertices(bg))}; move_sweep(states, m_entries, wr.get_unchecked(num_vertices(bg)), b.get_unchecked(num_vertices(g)), cv, vmap, label.get_unchecked(num_vertices(bg)), vlist, deg_corr, dense, multigraph, beta, eweight.get_unchecked(max_edge_index), vweight.get_unchecked(num_vertices(g)), g, sequential, parallel, random_move, c, nmerges, merge_map.get_unchecked(num_vertices(g)), niter, num_vertices(bg), verbose, rng, S, nmoves); } }; boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi, boost::any& emat, boost::any sampler, boost::any cavity_sampler, boost::any omrs, boost::any omrp, boost::any omrm, boost::any owr, boost::any ob, boost::any olabel, vector& vlist, bool deg_corr, bool dense, bool multigraph, boost::any oeweight, boost::any ovweight, boost::any oegroups, boost::any oesrcpos, boost::any oetgtpos, double beta, bool sequential, bool parallel, bool random_move, double c, bool weighted, size_t nmerges, boost::any omerge_map, size_t niter, partition_stats_t& partition_stats, bool verbose, rng_t& rng) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; typedef property_map_type::apply::type vemap_t; emap_t mrs = any_cast(omrs); vmap_t mrp = any_cast(omrp); vmap_t mrm = any_cast(omrm); vmap_t wr = any_cast(owr); vmap_t b = any_cast(ob); vmap_t label = any_cast(olabel); emap_t eweight = any_cast(oeweight); vmap_t vweight = any_cast(ovweight); vemap_t esrcpos = any_cast(oesrcpos); vemap_t etgtpos = any_cast(oetgtpos); double S = 0; size_t nmoves = 0; vmap_t merge_map = any_cast(omerge_map); run_action() (gi, std::bind(move_sweep_dispatch (eweight, vweight, oegroups, esrcpos, etgtpos, label, vlist, deg_corr, dense, multigraph, beta, sequential, parallel, random_move, c, verbose, gi.GetMaxEdgeIndex(), nmerges, niter, merge_map, partition_stats, rng, S, nmoves, bgi), mrs, mrp, mrm, wr, b, placeholders::_1, std::ref(emat), sampler, cavity_sampler, weighted))(); return boost::python::make_tuple(S, nmoves); } struct build_egroups { template void operator()(Vprop b, boost::any& oegroups, VEprop esrcpos, VEprop etgtpos, Eprop eweight, Graph& g, VertexIndex vertex_index, size_t B, bool weighted, bool empty) const { egroups_manage::build(b, oegroups, esrcpos, etgtpos, eweight, g, vertex_index, B, weighted, empty); } }; boost::any do_build_egroups(GraphInterface& gi, GraphInterface& bgi, boost::any ob, boost::any oeweights, boost::any oesrcpos, boost::any oetgtpos, bool weighted, bool empty) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; typedef property_map_type::apply::type vemap_t; vmap_t b = any_cast(ob); vemap_t esrcpos = any_cast(oesrcpos); vemap_t etgtpos = any_cast(oetgtpos); emap_t eweights = any_cast(oeweights); boost::any oegroups; run_action() (gi, std::bind(build_egroups(), b, std::ref(oegroups), esrcpos.get_unchecked(gi.GetMaxEdgeIndex()), etgtpos.get_unchecked(gi.GetMaxEdgeIndex()), eweights.get_unchecked(gi.GetMaxEdgeIndex()), placeholders::_1, bgi.GetVertexIndex(), bgi.GetNumberOfVertices(), weighted, empty))(); return oegroups; } boost::any do_init_neighbour_sampler(GraphInterface& gi, boost::any oeweights, bool self_loops, bool empty) { typedef property_map_type::apply::type emap_t; emap_t eweights = any_cast(oeweights); boost::any osampler; run_action() (gi, std::bind(init_neighbour_sampler(), placeholders::_1, eweights, self_loops, empty, std::ref(osampler)))(); return osampler; } struct collect_edge_marginals_dispatch { template void operator()(Graph& g, size_t B, Vprop cb, MEprop p, std::tuple abg) const { Graph& bg = *any_cast(get<0>(abg)); collect_edge_marginals(B, cb.get_unchecked(num_vertices(bg)), p, g, bg); } }; void do_collect_edge_marginals(GraphInterface& gi, GraphInterface& gbi, size_t B, boost::any ob, boost::any op) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply, GraphInterface::edge_index_map_t>::type emap_t; vmap_t b = any_cast(ob); emap_t p = any_cast(op); run_action() (gi, std::bind(collect_edge_marginals_dispatch(), placeholders::_1, B, b, p, std::tuple(gbi.GetGraphView(), gbi)))(); } boost::python::tuple do_bethe_entropy(GraphInterface& gi, size_t B, boost::any op, boost::any opv) { typedef property_map_type::apply, GraphInterface::vertex_index_map_t>::type vmap_t; typedef property_map_type::apply, GraphInterface::edge_index_map_t>::type emap_t; emap_t p = any_cast(op); vmap_t pv = any_cast(opv); double H=0, sH=0, Hmf=0, sHmf=0; run_action() (gi, std::bind(bethe_entropy(), placeholders::_1, B, p, pv, std::ref(H), std::ref(sH), std::ref(Hmf), std::ref(sHmf)))(); return boost::python::make_tuple(H, sH, Hmf, sHmf); } struct collect_vertex_marginals_dispatch { template void operator()(Graph& g, Vprop cb, MEprop p) const { collect_vertex_marginals(cb.get_unchecked(num_vertices(g)), p.get_unchecked(num_vertices(g)), g); } }; void do_collect_vertex_marginals(GraphInterface& gi, boost::any ob, boost::any op) { typedef property_map_type::apply::type vmap_t; vmap_t b = any_cast(ob); run_action() (gi, std::bind(collect_vertex_marginals_dispatch(), placeholders::_1, b, placeholders::_2), vertex_scalar_vector_properties())(op); } struct get_deg_entropy_term { template void operator()(Graph& g, double& S) const { for(auto v : vertices_range(g)) { S -= lgamma_fast(out_degree(v, g) + 1); S -= lgamma_fast(in_degreeS()(v, g) + 1); } } }; struct get_deg_entropy_term_overlap { template void operator()(Graph& g, Vprop b, overlap_stats_t& overlap_stats, size_t N, double& S) const { #ifdef HAVE_SPARSEHASH typedef dense_hash_map> map_t; #else typedef unordered_map map_t; #endif map_t in_hist, out_hist; #ifdef HAVE_SPARSEHASH in_hist.set_empty_key(numeric_limits::max()); out_hist.set_empty_key(numeric_limits::max()); #endif for (size_t v = 0; v < N; ++v) { in_hist.clear(); out_hist.clear(); auto& half_edges = overlap_stats.get_half_edges(v); for (size_t u : half_edges) { in_hist[b[u]] += in_degreeS()(u, g); out_hist[b[u]] += out_degree(u, g); } for (auto& k_c : in_hist) S -= lgamma_fast(k_c.second + 1); for (auto& k_c : out_hist) S -= lgamma_fast(k_c.second + 1); } } }; double do_get_deg_entropy_term(GraphInterface& gi, boost::any ob, overlap_stats_t& overlap_stats, size_t N) { typedef property_map_type::apply::type vmap_t; double S = 0; if (!ob.empty()) { vmap_t b = any_cast(ob); run_action<>() (gi, std::bind(get_deg_entropy_term_overlap(), placeholders::_1, b, std::ref(overlap_stats), N, std::ref(S)))(); } else { run_action<>() (gi, std::bind(get_deg_entropy_term(), placeholders::_1, std::ref(S)))(); } return S; } vector get_vector(size_t n) { return vector(n); } template void vector_map(boost::python::object ovals, boost::python::object omap) { multi_array_ref vals = get_array(ovals); multi_array_ref map = get_array(omap); size_t pos = 0; for (size_t i = 0; i < vals.size(); ++i) { Value v = vals[i]; if (map[v] == -1) map[v] = pos++; vals[i] = map[v]; } } template void vector_continuous_map(boost::python::object ovals) { multi_array_ref vals = get_array(ovals); unordered_map map; for (size_t i = 0; i < vals.size(); ++i) { Value v = vals[i]; auto iter = map.find(v); if (iter == map.end()) iter = map.insert(make_pair(v, map.size())).first; vals[i] = iter->second; } } template void vector_rmap(boost::python::object ovals, boost::python::object omap) { multi_array_ref vals = get_array(ovals); multi_array_ref map = get_array(omap); for (size_t i = 0; i < vals.size(); ++i) { map[vals[i]] = i; } } python::tuple python_get_mu_l(double N, double E, double epsilon=1e-8) { double mu, l; get_mu_l(N, E, mu, l, epsilon); return python::make_tuple(mu, l); } struct get_partition_stats { template void operator()(Graph& g, Vprop b, Eprop eweight, size_t N, size_t B, bool edges_dl, partition_stats_t& partition_stats) const { partition_stats = partition_stats_t(g, b, eweight, N, B, edges_dl); } }; partition_stats_t do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight, size_t N, size_t B, bool edges_dl) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; partition_stats_t partition_stats; vmap_t b = any_cast(ob); emap_t eweight = any_cast(aeweight); run_action<>()(gi, std::bind(get_partition_stats(), placeholders::_1, b, eweight, N, B, edges_dl, std::ref(partition_stats)))(); return partition_stats; } void export_blockmodel() { using namespace boost::python; class_("partition_stats") .def("is_enabled", &partition_stats_t::is_enabled) .def("get_partition_dl", &partition_stats_t::get_partition_dl) .def("get_deg_dl", &partition_stats_t::get_deg_dl); def("init_partition_stats", do_get_partition_stats); def("init_safelog", init_safelog); def("clear_safelog", clear_safelog); def("init_xlogx", init_xlogx); def("clear_xlogx", clear_xlogx); def("init_lgamma", init_lgamma); def("clear_lgamma", clear_lgamma); def("get_xi", get_xi); def("get_xi_fast", get_xi_fast); def("get_mu_l", python_get_mu_l); def("polylog", polylog); def("get_vector", get_vector); def("vector_map", vector_map); def("vector_map64", vector_map); def("vector_rmap", vector_rmap); def("vector_rmap64", vector_rmap); def("vector_continuous_map", vector_continuous_map); def("vector_continuous_map64", vector_continuous_map); def("create_emat", do_create_emat); def("create_ehash", do_create_ehash); def("build_egroups", do_build_egroups); def("init_neighbour_sampler", do_init_neighbour_sampler); def("move_sweep", do_move_sweep); def("entropy", do_get_ent); def("entropy_dense", do_get_ent_dense); def("entropy_parallel", do_get_ent_parallel); def("deg_entropy_term", do_get_deg_entropy_term); def("edge_marginals", do_collect_edge_marginals); def("bethe_entropy", do_bethe_entropy); def("vertex_marginals", do_collect_vertex_marginals); }