// graph-tool -- a general graph modification and manipulation thingy // // Copyright (C) 2006-2015 Tiago de Paula Peixoto // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . #define BOOST_PYTHON_MAX_ARITY 40 #include #include #include #include "numpy_bind.hh" #include #include #include "graph_filtering.hh" #include "graph.hh" #include "graph_selectors.hh" #include "graph_properties.hh" #include "graph_util.hh" #include "random.hh" #include "config.h" #include "graph_blockmodel_overlap.hh" #include "graph_blockmodel.hh" using namespace boost; using namespace graph_tool; // ==================== // Entropy calculation // ==================== // Repeated computation of x*log(x) and log(x) actually adds up to a lot of // time. A significant speedup can be made by caching pre-computed values. This // is doable since the values of mrse are bounded in [0, 2E], where E is the // total number of edges in the network. Here we simply grow the cache as // needed. namespace graph_tool { vector __safelog_cache; vector __xlogx_cache; vector __lgamma_cache; void init_safelog(size_t x) { size_t old_size = __safelog_cache.size(); if (x >= old_size) { __safelog_cache.resize(x + 1); for (size_t i = old_size; i < __safelog_cache.size(); ++i) __safelog_cache[i] = safelog(double(i)); } } void clear_safelog() { vector().swap(__safelog_cache); } void init_xlogx(size_t x) { size_t old_size = __xlogx_cache.size(); if (x >= old_size) { __xlogx_cache.resize(x + 1); for (size_t i = old_size; i < __xlogx_cache.size(); ++i) __xlogx_cache[i] = i * safelog(i); } } void clear_xlogx() { vector().swap(__xlogx_cache); } void init_lgamma(size_t x) { size_t old_size = __lgamma_cache.size(); if (x >= old_size) { __lgamma_cache.resize(x + 1); for (size_t i = old_size; i < __lgamma_cache.size(); ++i) __lgamma_cache[i] = lgamma(i); } } void clear_lgamma() { vector().swap(__lgamma_cache); } } double do_get_ent(GraphInterface& gi, boost::any omrs, boost::any omrp, boost::any omrm, boost::any owr, bool deg_corr) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; emap_t mrs = any_cast(omrs); vmap_t mrp = any_cast(omrp); vmap_t mrm = any_cast(omrm); vmap_t wr = any_cast(owr); double S = 0; run_action<>() (gi, std::bind(entropy(), mrs, mrp, mrm, wr, deg_corr, placeholders::_1, std::ref(S)))(); return S; } double do_get_ent_dense(GraphInterface& gi, boost::any omrs, boost::any owr, bool multigraph) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; emap_t mrs = any_cast(omrs); vmap_t wr = any_cast(owr); double S = 0; run_action<>() (gi, std::bind(entropy_dense(), mrs, wr, multigraph, placeholders::_1, std::ref(S)))(); return S; } double do_get_ent_parallel(GraphInterface& gi, boost::any oweight) { typedef property_map_type::apply::type emap_t; emap_t weight = any_cast(oweight); double S = 0; run_action<>() (gi, std::bind(entropy_parallel_edges(), placeholders::_1, weight, std::ref(S)))(); return S; } boost::any do_create_emat(GraphInterface& gi, boost::any ob, GraphInterface& bgi) { boost::any emat; typedef property_map_type::apply::type vmap_t; vmap_t b = any_cast(ob); if (gi.get_directed()) { run_action<>()(gi, std::bind(create_emat(), placeholders::_1, std::ref(b), std::ref(bgi.get_graph()), std::ref(emat)))(); } else { UndirectedAdaptor ug(bgi.get_graph()); run_action<>()(gi, std::bind(create_emat(), placeholders::_1, std::ref(b), std::ref(ug), std::ref(emat)))(); } return emat; } boost::any do_create_ehash(GraphInterface& gi, boost::any ob, GraphInterface& bgi, rng_t& rng) { boost::any emat; typedef property_map_type::apply::type vmap_t; vmap_t b = any_cast(ob); if (gi.get_directed()) { run_action<>()(gi, std::bind(create_ehash(), placeholders::_1, std::ref(b), std::ref(bgi.get_graph()), std::ref(emat), std::ref(rng)))(); } else { UndirectedAdaptor ug(bgi.get_graph()); run_action<>()(gi, std::bind(create_ehash(), placeholders::_1, std::ref(b), std::ref(ug), std::ref(emat), std::ref(rng)))(); } return emat; } //============ // Main loops //============ template struct move_sweep_dispatch { move_sweep_dispatch(Eprop eweight, Vprop vweight, boost::any egroups, VEprop esrcpos, VEprop etgtpos, Vprop label, vector& vlist, vector& block_list, vector& target_list, bool deg_corr, bool dense, bool multigraph, double beta, bool sequential, bool parallel, bool random_move, double c, bool verbose, size_t edge_index_range, size_t nmerges, size_t niter, Vprop merge_map, partition_stats_t& partition_stats, rng_t& rng, double& S, size_t& nmoves, GraphInterface& bgi) : eweight(eweight), vweight(vweight), oegroups(egroups), esrcpos(esrcpos), etgtpos(etgtpos), label(label), vlist(vlist), block_list(block_list), target_list(target_list), deg_corr(deg_corr), dense(dense), multigraph(multigraph), beta(beta), sequential(sequential), parallel(parallel), random_move(random_move), c(c), verbose(verbose), edge_index_range(edge_index_range), nmerges(nmerges), niter(niter), merge_map(merge_map), partition_stats(partition_stats), rng(rng), S(S), nmoves(nmoves), bgi(bgi) {} Eprop eweight; Vprop vweight; boost::any oegroups; VEprop esrcpos; VEprop etgtpos; Vprop label; size_t n; vector& vlist; vector& block_list; vector& target_list; bool deg_corr; bool dense; bool multigraph; double beta; bool sequential; bool parallel; bool random_move; double c; bool verbose; size_t edge_index_range; size_t nmerges; size_t niter; Vprop merge_map; partition_stats_t& partition_stats; rng_t& rng; double& S; size_t& nmoves; GraphInterface& bgi; template void operator()(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, boost::any& emat, boost::any sampler, boost::any cavity_sampler, bool weighted) const { if (is_directed::apply::type::value) { dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, cavity_sampler, bgi.get_graph(), weighted); } else { UndirectedAdaptor ug(bgi.get_graph()); dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, cavity_sampler, ug, weighted); } } template void dispatch(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, boost::any& aemat, boost::any asampler, boost::any acavity_sampler, BGraph& bg, bool weighted) const { if (weighted) { typedef typename property_map_type::apply::edge_descriptor, bool> >, GraphInterface::vertex_index_map_t>::type vemap_t; vemap_t egroups = any_cast(oegroups); try { typedef typename get_emat_t::apply::type emat_t; emat_t& emat = any_cast(aemat); size_t B = num_vertices(bg); size_t max_BE = is_directed::apply::type::value ? B * B : (B * (B + 1)) / 2; dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } catch (bad_any_cast&) { typedef typename get_ehash_t::apply::type emat_t; emat_t& emat = any_cast(aemat); dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } } else { typedef typename property_map_type::apply::edge_descriptor, bool> >, GraphInterface::vertex_index_map_t>::type vemap_t; vemap_t egroups = any_cast(oegroups); try { typedef typename get_emat_t::apply::type emat_t; emat_t& emat = any_cast(aemat); size_t B = num_vertices(bg); size_t max_BE = is_directed::apply::type::value ? B * B : (B * (B + 1)) / 2; dispatch(mrs.get_unchecked(max_BE), mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } catch (bad_any_cast&) { typedef typename get_ehash_t::apply::type emat_t; emat_t& emat = any_cast(aemat); dispatch(mrs, mrp, mrm, wr, b, g, asampler, acavity_sampler, bg, egroups, emat); } } } template void dispatch(MEprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g, boost::any asampler, boost::any acavity_sampler, BGraph& bg, Egroups egroups, Emat& emat) const { typedef typename graph_traits::vertex_descriptor vertex_t; size_t eidx = random_move ? 1 : edge_index_range; typedef typename property_map::type vindex_map_t; typedef typename property_map_type::apply, vindex_map_t>::type::unchecked_t sampler_map_t; sampler_map_t sampler = any_cast(asampler); sampler_map_t cavity_sampler = any_cast(acavity_sampler); ConstantPropertyMap, typename graph_traits::vertex_descriptor> cv(std::array({{-1}})); IdentityArrayPropertyMap::vertex_descriptor> vmap; boost::typed_identity_property_map identity; // make sure the properties are _unchecked_, since otherwise it // affects performance overlap_stats_t ostats; vector free_blocks; auto state = make_block_state(g, eweight.get_unchecked(edge_index_range), vweight.get_unchecked(num_vertices(g)), b.get_unchecked(num_vertices(g)), bg, emat, mrs, mrp.get_unchecked(num_vertices(bg)), mrm.get_unchecked(num_vertices(bg)), wr.get_unchecked(num_vertices(bg)), egroups.get_unchecked(num_vertices(bg)), esrcpos.get_unchecked(eidx), etgtpos.get_unchecked(eidx), sampler, cavity_sampler, partition_stats, ostats, identity, identity, free_blocks, false, false, true); vector states = {state}; vector> m_entries = {EntrySet(num_vertices(bg))}; if (target_list.empty()) { move_sweep(states, m_entries, wr.get_unchecked(num_vertices(bg)), b.get_unchecked(num_vertices(g)), cv, vmap, label.get_unchecked(num_vertices(bg)), vlist, block_list, deg_corr, dense, multigraph, beta, eweight.get_unchecked(edge_index_range), vweight.get_unchecked(num_vertices(g)), g, sequential, parallel, random_move, c, nmerges, merge_map.get_unchecked(num_vertices(g)), niter, num_vertices(bg), verbose, rng, S, nmoves); } else { auto ub = b.get_unchecked(num_vertices(g)); for (size_t i = 0; i < vlist.size(); ++i) { size_t v = vlist[i]; size_t s = target_list[i]; S += virtual_move(v, s, ub, cv, vmap, states, m_entries, dense, deg_corr, multigraph); move_vertex(v, s, ub, cv, vmap, deg_corr, states, not random_move); nmoves++; } } } }; boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi, boost::any& emat, boost::any sampler, boost::any cavity_sampler, boost::any omrs, boost::any omrp, boost::any omrm, boost::any owr, boost::any ob, boost::any olabel, vector& vlist, vector& block_list, vector& target_list, bool deg_corr, bool dense, bool multigraph, boost::any oeweight, boost::any ovweight, boost::any oegroups, boost::any oesrcpos, boost::any oetgtpos, double beta, bool sequential, bool parallel, bool random_move, double c, bool weighted, size_t nmerges, boost::any omerge_map, size_t niter, partition_stats_t& partition_stats, bool verbose, rng_t& rng) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; typedef property_map_type::apply::type vemap_t; emap_t mrs = any_cast(omrs); vmap_t mrp = any_cast(omrp); vmap_t mrm = any_cast(omrm); vmap_t wr = any_cast(owr); vmap_t b = any_cast(ob); vmap_t label = any_cast(olabel); emap_t eweight = any_cast(oeweight); vmap_t vweight = any_cast(ovweight); vemap_t esrcpos = any_cast(oesrcpos); vemap_t etgtpos = any_cast(oetgtpos); double S = 0; size_t nmoves = 0; vmap_t merge_map = any_cast(omerge_map); run_action() (gi, std::bind(move_sweep_dispatch (eweight, vweight, oegroups, esrcpos, etgtpos, label, vlist, block_list, target_list, deg_corr, dense, multigraph, beta, sequential, parallel, random_move, c, verbose, gi.get_edge_index_range(), nmerges, niter, merge_map, partition_stats, rng, S, nmoves, bgi), mrs, mrp, mrm, wr, b, placeholders::_1, std::ref(emat), sampler, cavity_sampler, weighted))(); return boost::python::make_tuple(S, nmoves); } struct build_egroups { template void operator()(Vprop b, boost::any& oegroups, VEprop esrcpos, VEprop etgtpos, Eprop eweight, Graph& g, VertexIndex vertex_index, size_t B, bool weighted, bool empty) const { egroups_manage::build(b, oegroups, esrcpos, etgtpos, eweight, g, vertex_index, B, weighted, empty); } }; boost::any do_build_egroups(GraphInterface& gi, GraphInterface& bgi, boost::any ob, boost::any oeweights, boost::any oesrcpos, boost::any oetgtpos, bool weighted, bool empty) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; typedef property_map_type::apply::type vemap_t; vmap_t b = any_cast(ob); vemap_t esrcpos = any_cast(oesrcpos); vemap_t etgtpos = any_cast(oetgtpos); emap_t eweights = any_cast(oeweights); boost::any oegroups; run_action() (gi, std::bind(build_egroups(), b, std::ref(oegroups), esrcpos.get_unchecked(gi.get_edge_index_range()), etgtpos.get_unchecked(gi.get_edge_index_range()), eweights.get_unchecked(gi.get_edge_index_range()), placeholders::_1, bgi.get_vertex_index(), bgi.get_num_vertices(), weighted, empty))(); return oegroups; } boost::any do_init_neighbour_sampler(GraphInterface& gi, boost::any oeweights, bool self_loops, bool empty) { typedef property_map_type::apply::type emap_t; emap_t eweights = any_cast(oeweights); boost::any osampler; run_action() (gi, std::bind(init_neighbour_sampler(), placeholders::_1, eweights, self_loops, empty, std::ref(osampler)))(); return osampler; } struct collect_edge_marginals_dispatch { template void operator()(Graph& g, size_t B, Vprop cb, MEprop p, std::tuple abg) const { Graph& bg = *any_cast(get<0>(abg)); collect_edge_marginals(B, cb.get_unchecked(num_vertices(bg)), p, g, bg); } }; void do_collect_edge_marginals(GraphInterface& gi, GraphInterface& gbi, size_t B, boost::any ob, boost::any op) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply, GraphInterface::edge_index_map_t>::type emap_t; vmap_t b = any_cast(ob); emap_t p = any_cast(op); run_action() (gi, std::bind(collect_edge_marginals_dispatch(), placeholders::_1, B, b, p, std::tuple(gbi.get_graph_view(), gbi)))(); } boost::python::tuple do_bethe_entropy(GraphInterface& gi, size_t B, boost::any op, boost::any opv) { typedef property_map_type::apply, GraphInterface::vertex_index_map_t>::type vmap_t; typedef property_map_type::apply, GraphInterface::edge_index_map_t>::type emap_t; emap_t p = any_cast(op); vmap_t pv = any_cast(opv); double H=0, sH=0, Hmf=0, sHmf=0; run_action() (gi, std::bind(bethe_entropy(), placeholders::_1, B, p, pv, std::ref(H), std::ref(sH), std::ref(Hmf), std::ref(sHmf)))(); return boost::python::make_tuple(H, sH, Hmf, sHmf); } struct collect_vertex_marginals_dispatch { template void operator()(Graph& g, Vprop cb, MEprop p) const { collect_vertex_marginals(cb.get_unchecked(num_vertices(g)), p.get_unchecked(num_vertices(g)), g); } }; void do_collect_vertex_marginals(GraphInterface& gi, boost::any ob, boost::any op) { typedef property_map_type::apply::type vmap_t; vmap_t b = any_cast(ob); run_action() (gi, std::bind(collect_vertex_marginals_dispatch(), placeholders::_1, b, placeholders::_2), vertex_scalar_vector_properties())(op); } struct get_deg_entropy_term { template void operator()(Graph& g, Emap eweight, VImap ignore_degrees, double& S) const { for(auto v : vertices_range(g)) { if (ignore_degrees[v] == 1) continue; S -= lgamma_fast(in_degreeS()(v, g, eweight) + 1); if (ignore_degrees[v] == 2) continue; S -= lgamma_fast(out_degreeS()(v, g, eweight) + 1); } } }; struct get_deg_entropy_term_overlap { template void operator()(Graph& g, Vprop b, overlap_stats_t& overlap_stats, size_t N, double& S) const { typedef gt_hash_map map_t; map_t in_hist, out_hist; for (size_t v = 0; v < N; ++v) { in_hist.clear(); out_hist.clear(); auto& half_edges = overlap_stats.get_half_edges(v); for (size_t u : half_edges) { in_hist[b[u]] += in_degreeS()(u, g); out_hist[b[u]] += out_degree(u, g); } for (auto& k_c : in_hist) S -= lgamma_fast(k_c.second + 1); for (auto& k_c : out_hist) S -= lgamma_fast(k_c.second + 1); } } }; double do_get_deg_entropy_term(GraphInterface& gi, boost::any ob, overlap_stats_t& overlap_stats, size_t N, boost::any aeweight, boost::any aignore_degrees) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; typedef property_map_type::apply::type vimap_t; double S = 0; if (!ob.empty()) { vmap_t b = any_cast(ob); run_action<>() (gi, std::bind(get_deg_entropy_term_overlap(), placeholders::_1, b, std::ref(overlap_stats), N, std::ref(S)))(); } else { emap_t eweight = any_cast(aeweight); vimap_t ignore_degrees = any_cast(aignore_degrees); run_action<>() (gi, std::bind(get_deg_entropy_term(), placeholders::_1, eweight, ignore_degrees, std::ref(S)))(); } return S; } vector get_vector(size_t n) { return vector(n); } template void vector_map(boost::python::object ovals, boost::python::object omap) { multi_array_ref vals = get_array(ovals); multi_array_ref map = get_array(omap); size_t pos = 0; for (size_t i = 0; i < vals.size(); ++i) { Value v = vals[i]; if (map[v] == -1) map[v] = pos++; vals[i] = map[v]; } } template void vector_continuous_map(boost::python::object ovals) { multi_array_ref vals = get_array(ovals); gt_hash_map map; for (size_t i = 0; i < vals.size(); ++i) { Value v = vals[i]; auto iter = map.find(v); if (iter == map.end()) iter = map.insert(make_pair(v, map.size())).first; vals[i] = iter->second; } } template void vector_rmap(boost::python::object ovals, boost::python::object omap) { multi_array_ref vals = get_array(ovals); multi_array_ref map = get_array(omap); for (size_t i = 0; i < vals.size(); ++i) { map[vals[i]] = i; } } python::tuple python_get_mu_l(double N, double E, double epsilon=1e-8) { double mu, l; get_mu_l(N, E, mu, l, epsilon); return python::make_tuple(mu, l); } struct get_partition_stats { template void operator()(Graph& g, Vprop b, Eprop eweight, size_t N, size_t B, bool edges_dl, partition_stats_t& partition_stats, Mprop ignore_degrees) const { partition_stats = partition_stats_t(g, b, eweight, N, B, edges_dl, ignore_degrees); } }; partition_stats_t do_get_partition_stats(GraphInterface& gi, boost::any ob, boost::any aeweight, size_t N, size_t B, bool edges_dl, boost::any aignore_degrees) { typedef property_map_type::apply::type vmap_t; typedef property_map_type::apply::type emap_t; typedef property_map_type::apply::type mvmap_t; partition_stats_t partition_stats; vmap_t b = any_cast(ob); emap_t eweight = any_cast(aeweight); mvmap_t ignore_degrees = any_cast(aignore_degrees); run_action<>()(gi, std::bind(get_partition_stats(), placeholders::_1, b, eweight, N, B, edges_dl, std::ref(partition_stats), ignore_degrees))(); return partition_stats; } void export_blockmodel() { using namespace boost::python; class_("partition_stats") .def("is_enabled", &partition_stats_t::is_enabled) .def("get_partition_dl", &partition_stats_t::get_partition_dl) .def("get_deg_dl", &partition_stats_t::get_deg_dl); def("init_partition_stats", do_get_partition_stats); def("init_safelog", init_safelog); def("clear_safelog", clear_safelog); def("init_xlogx", init_xlogx); def("clear_xlogx", clear_xlogx); def("init_lgamma", init_lgamma); def("clear_lgamma", clear_lgamma); def("get_xi", get_xi); def("get_xi_fast", get_xi_fast); def("get_mu_l", python_get_mu_l); def("polylog", polylog); def("lbinom_careful", lbinom_careful); def("lbinom_fast", lbinom_fast); def("lbinom", lbinom); def("get_vector", get_vector); def("vector_map", vector_map); def("vector_map64", vector_map); def("vector_rmap", vector_rmap); def("vector_rmap64", vector_rmap); def("vector_continuous_map", vector_continuous_map); def("vector_continuous_map64", vector_continuous_map); def("create_emat", do_create_emat); def("create_ehash", do_create_ehash); def("build_egroups", do_build_egroups); def("init_neighbour_sampler", do_init_neighbour_sampler); def("move_sweep", do_move_sweep); def("entropy", do_get_ent); def("entropy_dense", do_get_ent_dense); def("entropy_parallel", do_get_ent_parallel); def("deg_entropy_term", do_get_deg_entropy_term); def("edge_marginals", do_collect_edge_marginals); def("bethe_entropy", do_bethe_entropy); def("vertex_marginals", do_collect_vertex_marginals); }