Commit 959b5fd9 authored by Tiago Peixoto's avatar Tiago Peixoto

Improve performance and memory usage of blockmodel inference code

parent a4834bf7
......@@ -439,23 +439,45 @@ struct move_sweep_dispatch
template <class Graph>
void operator()(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b,
Graph& g, boost::any& emat, boost::any sampler) const
Graph& g, boost::any& emat, boost::any sampler,
bool weighted) const
{
if (is_directed::apply<Graph>::type::value)
{
dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, bgi.GetGraph());
dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, bgi.GetGraph(),
weighted);
}
else
{
UndirectedAdaptor<GraphInterface::multigraph_t> ug(bgi.GetGraph());
dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, ug);
dispatch(mrs, mrp, mrm, wr, b, g, emat, sampler, ug, weighted);
}
}
template <class Graph, class BGraph>
void dispatch(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g,
boost::any& aemat, boost::any asampler, BGraph& bg) const
boost::any& aemat, boost::any asampler, BGraph& bg, bool weighted) const
{
if (weighted)
{
typedef typename property_map_type::apply<DynamicSampler<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >,
GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups);
dispatch(mrs, mrp, mrm, wr, b, g, aemat, asampler, bg, egroups);
}
else
{
typedef typename property_map_type::apply<vector<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool> >,
GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups);
dispatch(mrs, mrp, mrm, wr, b, g, aemat, asampler, bg, egroups);
}
}
template <class Graph, class BGraph, class Egroups>
void dispatch(Eprop mrs, Vprop mrp, Vprop mrm, Vprop wr, Vprop b, Graph& g,
boost::any& aemat, boost::any asampler, BGraph& bg,
Egroups egroups) const
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
......@@ -463,9 +485,6 @@ struct move_sweep_dispatch
size_t max_BE = is_directed::apply<Graph>::type::value ?
B * B : (B * (B + 1)) / 2;
typedef typename property_map_type::apply<vector<std::tuple<typename graph_traits<Graph>::edge_descriptor, bool, size_t> >,
GraphInterface::vertex_index_map_t>::type vemap_t;
vemap_t egroups = any_cast<vemap_t>(oegroups);
size_t eidx = random_move ? 1 : max_edge_index;
......@@ -480,8 +499,8 @@ struct move_sweep_dispatch
typedef typename get_emat_t::apply<BGraph>::type emat_t;
emat_t& emat = any_cast<emat_t&>(aemat);
//make sure the properties are _unchecked_, since otherwise it affects
//performance
// make sure the properties are _unchecked_, since otherwise it
// affects performance
move_sweep(mrs.get_unchecked(max_BE),
mrp.get_unchecked(num_vertices(bg)),
......@@ -531,7 +550,8 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
boost::any ovweight, boost::any oegroups,
boost::any oesrcpos, boost::any oetgtpos,
double beta, bool sequential, bool random_move,
double c, bool verbose, rng_t& rng)
double c, bool weighted, bool verbose,
rng_t& rng)
{
typedef property_map_type::apply<int32_t,
GraphInterface::vertex_index_map_t>::type
......@@ -539,7 +559,7 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
typedef property_map_type::apply<int32_t,
GraphInterface::edge_index_map_t>::type
emap_t;
typedef property_map_type::apply<vector<int32_t>,
typedef property_map_type::apply<int32_t,
GraphInterface::edge_index_map_t>::type
vemap_t;
emap_t mrs = any_cast<emap_t>(omrs);
......@@ -564,7 +584,7 @@ boost::python::object do_move_sweep(GraphInterface& gi, GraphInterface& bgi,
sequential, random_move, c, verbose,
gi.GetMaxEdgeIndex(), rng, S, nmoves, bgi),
mrs, mrp, mrm, wr, b, placeholders::_1,
std::ref(emat), sampler))();
std::ref(emat), sampler, weighted))();
return boost::python::make_tuple(S, nmoves);
}
......@@ -681,10 +701,25 @@ boost::python::object do_merge_sweep(GraphInterface& bgi, boost::any& emat,
return boost::python::make_tuple(S, nmoves);
}
struct build_egroups
{
template <class Eprop, class Vprop, class VEprop, class Graph, class VertexIndex>
void operator()(Vprop b, boost::any& oegroups, VEprop esrcpos,
VEprop etgtpos, Eprop eweight, Graph& g,
VertexIndex vertex_index, bool weighted, bool empty) const
{
if (empty)
return;
egroups_manage::build(b, oegroups, esrcpos, etgtpos, eweight, g,
vertex_index, weighted);
}
};
boost::any do_build_egroups(GraphInterface& gi, GraphInterface& bgi,
boost::any ob, boost::any oeweights,
boost::any oesrcpos, boost::any oetgtpos,
bool empty)
bool weighted, bool empty)
{
typedef property_map_type::apply<int32_t,
GraphInterface::vertex_index_map_t>::type
......@@ -692,7 +727,7 @@ boost::any do_build_egroups(GraphInterface& gi, GraphInterface& bgi,
typedef property_map_type::apply<int32_t,
GraphInterface::edge_index_map_t>::type
emap_t;
typedef property_map_type::apply<vector<int32_t>,
typedef property_map_type::apply<int32_t,
GraphInterface::edge_index_map_t>::type
vemap_t;
vmap_t b = any_cast<vmap_t>(ob);
......@@ -707,7 +742,8 @@ boost::any do_build_egroups(GraphInterface& gi, GraphInterface& bgi,
esrcpos.get_unchecked(gi.GetMaxEdgeIndex()),
etgtpos.get_unchecked(gi.GetMaxEdgeIndex()),
eweights.get_unchecked(gi.GetMaxEdgeIndex()),
placeholders::_1, bgi.GetVertexIndex(), empty))();
placeholders::_1, bgi.GetVertexIndex(), weighted,
empty))();
return oegroups;
}
......
This diff is collapsed.
......@@ -39,4 +39,5 @@ libgraph_tool_generation_la_include_HEADERS = \
graph_geometric.hh \
graph_complete.hh \
graph_price.hh \
dynamic_sampler.hh \
sampler.hh
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2013 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef DYNAMIC_SAMPLER_HH
#define DYNAMIC_SAMPLER_HH
#include "random.hh"
#include <functional>
#include <boost/mpl/if.hpp>
namespace graph_tool
{
using namespace std;
using namespace boost;
template <class Value>
class DynamicSampler
{
public:
DynamicSampler() : _back(0) {}
DynamicSampler(const vector<Value>& items,
const vector<double>& probs)
: _back(0)
{
for (size_t i = 0; i < items.size(); ++i)
insert(items[i], probs[i]);
}
typedef Value value_type;
size_t get_left(size_t i) { return 2 * i + 1; }
size_t get_right(size_t i) { return 2 * i + 2; }
size_t get_parent(size_t i) { return i > 0 ? (i - 1) / 2 : 0; }
template <class RNG>
const Value& sample(RNG& rng)
{
uniform_real_distribution<> sample(0, 1);
double u = _tree[0] * sample(rng), c = 0;
size_t pos = 0;
while (_idx[pos] < 0)
{
size_t l = get_left(pos);
double a = _tree[l];
if (u < a + c)
{
pos = l;
}
else
{
pos = get_right(pos);
c += a;
}
}
size_t i = _idx[pos];
return _items[i];
}
size_t insert(const Value& v, double w)
{
size_t pos;
if (_free.empty())
{
if (_back > 0)
{
// move parent to left leaf
pos = get_parent(_back);
size_t l = get_left(pos);
_idx[l] = _idx[pos];
_ipos[_idx[l]] = l;
_tree[l] = _tree[pos];
_idx[pos] = -1;
// position new item to the right
_back = get_right(pos);
}
pos = _back;
check_size(pos);
_idx[pos] = _items.size();
_items.push_back(v);
_ipos.push_back(pos);
_tree[pos] = w;
_back++;
check_size(_back);
}
else
{
pos = _free.back();
_items[_idx[pos]] = v;
_tree[pos] = w;
_free.pop_back();
}
insert_leaf_prob(pos);
return _idx[pos];
}
void remove(size_t i)
{
size_t pos = _ipos[i];
remove_leaf_prob(pos);
_free.push_back(pos);
}
void reset()
{
_items.clear();
_tree.clear();
_idx.clear();
_back = 0;
_free.clear();
}
void rebuild()
{
vector<Value> items;
vector<double> probs;
for (size_t i = 0; i < _tree.size(); ++i)
{
if (_idx[i] < 0)
continue;
items.push_back(_items[_idx[i]]);
probs.push_back(_tree[i]);
}
reset();
for (size_t i = 0; i < items.size(); ++i)
insert(items[i], probs[i]);
}
private:
void check_size(size_t i)
{
if (i >= _tree.size())
{
_idx.resize(i + 1, -1);
_tree.resize(i + 1, 0);
}
}
void remove_leaf_prob(size_t i)
{
size_t parent = i;
double w = _tree[i];
while (parent > 0)
{
parent = get_parent(parent);
_tree[parent] -= w;
}
_tree[i] = 0;
}
void insert_leaf_prob(size_t i)
{
size_t parent = i;
double w = _tree[i];
while (parent > 0)
{
parent = get_parent(parent);
_tree[parent] += w;
}
}
vector<Value> _items;
vector<size_t> _ipos; // position of the item in the tree
vector<double> _tree; // tree nodes with weight sums
vector<int> _idx; // index in _items
int _back; // last item in tree
vector<size_t> _free; // empty leafs
};
} // namespace graph_tool
#endif // DYNAMIC_SAMPLER_HH
......@@ -20,6 +20,7 @@
#include "graph_filtering.hh"
#include "graph_generation.hh"
#include "sampler.hh"
#include "dynamic_sampler.hh"
#include <boost/python.hpp>
using namespace std;
......@@ -115,4 +116,14 @@ BOOST_PYTHON_MODULE(libgraph_tool_generation)
init<const vector<int>&, const vector<double>&>())
.def("sample", &Sampler<int, boost::mpl::false_>::sample<rng_t>,
return_value_policy<copy_const_reference>());
class_<DynamicSampler<int>>("DynamicSampler",
init<const vector<int>&,
const vector<double>&>())
.def("sample", &DynamicSampler<int>::sample<rng_t>,
return_value_policy<copy_const_reference>())
.def("insert", &DynamicSampler<int>::insert)
.def("remove", &DynamicSampler<int>::remove)
.def("reset", &DynamicSampler<int>::reset)
.def("rebuild", &DynamicSampler<int>::rebuild);
}
......@@ -30,7 +30,7 @@ using namespace boost;
// Discrete sampling via vose's alias method.
// See http://www.keithschwarz.com/darts-dice-coins/ for a very clear
// explanation,
// explanation.
template <class Value, class KeepReference = mpl::true_>
class Sampler
......
......@@ -178,15 +178,16 @@ class BlockState(object):
self.emat = libcommunity.create_ehash(self.bg._Graph__graph)
def __build_egroups(self, empty=False):
self.esrcpos = self.g.new_edge_property("vector<int>")
self.etgtpos = self.g.new_edge_property("vector<int>")
self.esrcpos = self.g.new_edge_property("int")
self.etgtpos = self.g.new_edge_property("int")
self.is_weighted = True if self.eweight.fa.max() > 1 else False
self.egroups = libcommunity.build_egroups(self.g._Graph__graph,
self.bg._Graph__graph,
_prop("v", self.g, self.b),
_prop("e", self.g, self.eweight),
_prop("e", self.g, self.esrcpos),
_prop("e", self.g, self.etgtpos),
empty)
self.is_weighted, empty)
def __build_nsampler(self):
self.nsampler = libcommunity.init_neighbour_sampler(self.g._Graph__graph,
......@@ -511,7 +512,7 @@ class BlockState(object):
state = greedy_shrink(state, B=Bi, nsweeps=niter,
epsilon=1e-3, c=0,
nmerge_sweeps=niter, sequential=True)
nmerge_sweeps=niter)
for i in range(niter):
mcmc_sweep(state, c=0, beta=float("inf"))
......@@ -895,7 +896,8 @@ def mcmc_sweep(state, beta=1., random_move=False, c=1., dense=False,
_prop("e", state.g, state.esrcpos),
_prop("e", state.g, state.etgtpos),
float(beta), sequential, random_move,
c, verbose, _get_rng())
c, state.is_weighted, verbose,
_get_rng())
finally:
if random_move:
state.egroups = None
......
......@@ -1593,3 +1593,12 @@ class Sampler(libgraph_tool_generation.Sampler):
def sample(self):
return libgraph_tool_generation.Sampler.sample(self, _get_rng())
class DynamicSampler(libgraph_tool_generation.DynamicSampler):
def __init__(self, values=None, probs=None):
if values == None:
values = probs = []
libgraph_tool_generation.DynamicSampler.__init__(self, values, probs)
def sample(self):
return libgraph_tool_generation.DynamicSampler.sample(self, _get_rng())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment