Commit 0c34c720 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add fast "traditional" blockmodel sampling to random_graph/rewire

This also improves some of the existing shuffling strategies with a fast
alias sampling. The parameter names of the random_graph() and
random_rewire() functions have been made more consistent.
parent ffc3eaae
......@@ -47,8 +47,8 @@ private:
};
void generate_graph(GraphInterface& gi, size_t N, python::object deg_sample,
bool uncorrelated, bool no_parallel, bool no_self_loops,
bool undirected, rng_t& rng, bool verbose, bool verify)
bool no_parallel, bool no_self_loops, bool undirected,
rng_t& rng, bool verbose, bool verify)
{
typedef graph_tool::detail::get_all_graph_views::apply<
graph_tool::detail::scalar_pairs, mpl::bool_<false>,
......@@ -58,28 +58,18 @@ void generate_graph(GraphInterface& gi, size_t N, python::object deg_sample,
if (undirected)
gi.SetDirected(false);
if (uncorrelated)
{
run_action<graph_views>()
(gi, bind<void>(gen_graph(), _1, N,
PythonFuncWrap(deg_sample),
no_parallel, no_self_loops,
ref(rng), verbose, verify))();
}
else
{
run_action<graph_views>()
(gi, bind<void>(gen_graph(), _1, N,
PythonFuncWrap(deg_sample),
no_parallel, no_self_loops,
ref(rng), verbose, verify))();
}
run_action<graph_views>()
(gi, bind<void>(gen_graph(), _1, N,
PythonFuncWrap(deg_sample),
no_parallel, no_self_loops,
ref(rng), verbose, verify))();
}
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
python::object corr_prob, boost::any block,
bool cache, rng_t& rng, bool verbose);
bool alias, bool traditional, bool persist,
python::object corr_prob, boost::any block, bool cache,
rng_t& rng, bool verbose);
void predecessor_graph(GraphInterface& gi, GraphInterface& gpi,
boost::any pred_map);
void line_graph(GraphInterface& gi, GraphInterface& lgi,
......
......@@ -52,27 +52,45 @@ private:
python::object _o;
};
struct graph_rewire_block
{
graph_rewire_block(bool alias, bool traditional) : alias(alias), traditional(traditional) {}
bool alias;
bool traditional;
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockProp>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
pair<bool, bool> rest, BlockProp block_prop,
pair<size_t, bool> iter_sweep,
pair<bool, bool> cache_verbose, size_t& pcount, rng_t& rng)
tr1::tuple<bool, bool, bool> cache_verbose, size_t& pcount, rng_t& rng)
const
{
graph_rewire<ProbabilisticRewireStrategy>()
(g, edge_index, corr_prob, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
if (traditional)
{
graph_rewire<TradBlockRewireStrategy>()
(g, edge_index, corr_prob, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
}
else
{
if (alias)
graph_rewire<AliasProbabilisticRewireStrategy>()
(g, edge_index, corr_prob, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
else
graph_rewire<ProbabilisticRewireStrategy>()
(g, edge_index, corr_prob, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
}
}
};
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
python::object corr_prob, boost::any block,
bool cache, rng_t& rng, bool verbose)
bool alias, bool traditional, bool persist,
python::object corr_prob, boost::any block, bool cache,
rng_t& rng, bool verbose)
{
PythonFuncWrap corr(corr_prob);
size_t pcount = 0;
......@@ -83,7 +101,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
_1, gi.GetEdgeIndex(), boost::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
make_pair(cache, verbose),
tr1::make_tuple(persist, cache, verbose),
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "uncorrelated")
run_action<graph_tool::detail::never_reversed>()
......@@ -91,7 +109,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
_1, gi.GetEdgeIndex(), boost::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
make_pair(cache, verbose),
tr1::make_tuple(persist, cache, verbose),
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "correlated")
run_action<graph_tool::detail::never_reversed>()
......@@ -99,7 +117,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
_1, gi.GetEdgeIndex(), boost::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
make_pair(cache, verbose),
tr1::make_tuple(persist, cache, verbose),
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "probabilistic")
run_action<>()
......@@ -107,15 +125,15 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
_1, gi.GetEdgeIndex(), boost::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
make_pair(cache, verbose),
tr1::make_tuple(persist, cache, verbose),
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "blockmodel")
run_action<>()
(gi, boost::bind<void>(graph_rewire_block(),
(gi, boost::bind<void>(graph_rewire_block(alias, traditional),
_1, gi.GetEdgeIndex(), boost::ref(corr),
make_pair(self_loops, parallel_edges), _2,
make_pair(niter, no_sweep),
make_pair(cache, verbose),
tr1::make_tuple(persist, cache, verbose),
boost::ref(pcount), boost::ref(rng)),
vertex_properties())(block);
else
......
......@@ -20,6 +20,8 @@
#include "tr1_include.hh"
#include TR1_HEADER(unordered_set)
#include TR1_HEADER(tuple)
#include <boost/functional/hash.hpp>
#include <iostream>
......@@ -98,7 +100,7 @@ struct swap_edge
vector<typename graph_traits<Graph>::edge_descriptor>& edges,
EdgeIndexMap, Graph& g)
{
// swap the source of the edge 'e' with the source of edge 'se', as
// swap the target of the edge 'e' with the target of edge 'te', as
// such:
//
// (s) -e--> (t) (s) -e--> (nt)
......@@ -194,14 +196,14 @@ struct graph_rewire
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
pair<bool, bool> cache_verbose,
tr1::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng, BlockDeg bd)
const
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
bool cache = cache_verbose.first;
bool verbose = cache_verbose.second;
bool persist = tr1::get<0>(cache_verbose);
bool cache = tr1::get<1>(cache_verbose);
bool verbose = tr1::get<2>(cache_verbose);
vector<edge_t> edges;
vector<size_t> edge_pos;
......@@ -239,7 +241,13 @@ struct graph_rewire
if (verbose)
print_progress(i, niter, e_pos, no_sweep ? 1 : edges.size(),
str);
bool success = rewire(*ei, self_loops, parallel_edges);
bool success = false;
do
{
success = rewire(*ei, self_loops, parallel_edges);
}
while(persist && !success);
if (!success)
++pcount;
......@@ -255,7 +263,7 @@ struct graph_rewire
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
pair<bool, bool> cache_verbose,
tr1::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng)
const
{
......@@ -289,10 +297,6 @@ public:
bool operator()(size_t ei, bool self_loops, bool parallel_edges)
{
//try randomly drawn pairs of vertices
typedef random_permutation_iterator
<typename graph_traits<Graph>::vertex_iterator, rng_t>
random_vertex_iter;
tr1::uniform_int<size_t> sample(0, _vertices.size() - 1);
typename graph_traits<Graph>::vertex_descriptor s, t;
while (true)
......@@ -512,6 +516,7 @@ protected:
// general stochastic blockmodel
// this version is based on rejection sampling
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockDeg>
class ProbabilisticRewireStrategy:
public RewireStrategyBase<Graph, EdgeIndexMap,
......@@ -539,17 +544,17 @@ public:
: base_t(g, edge_index, edges, rng), _g(g), _corr_prob(corr_prob),
_blockdeg(blockdeg)
{
tr1::unordered_set<deg_t, hash<deg_t> > deg_set;
for (size_t ei = 0; ei < base_t::_edges.size(); ++ei)
{
edge_t& e = base_t::_edges[ei];
deg_set.insert(get_deg(source(e, g), g));
deg_set.insert(get_deg(target(e, g), g));
}
if (cache)
{
// cache probabilities
tr1::unordered_set<deg_t, hash<deg_t> > deg_set;
for (size_t ei = 0; ei < base_t::_edges.size(); ++ei)
{
edge_t& e = base_t::_edges[ei];
deg_set.insert(get_deg(source(e, g), g));
deg_set.insert(get_deg(target(e, g), g));
}
for (typeof(deg_set.begin()) s_iter = deg_set.begin();
s_iter != deg_set.end(); ++s_iter)
for (typeof(deg_set.begin()) t_iter = deg_set.begin();
......@@ -558,6 +563,9 @@ public:
double p = _corr_prob(*s_iter, *t_iter);
if (isnan(p) || isinf(p) || p < 0)
p = 0;
// avoid zero probability to not get stuck in rejection step
if (p == 0)
p = numeric_limits<double>::min();
_probs[make_pair(*s_iter, *t_iter)] = p;
}
}
......@@ -570,6 +578,9 @@ public:
double p = _corr_prob(s_deg, t_deg);
if (isnan(p) || isinf(p) || p < 0)
p = 0;
// avoid zero probability to not get stuck in rejection step
if (p == 0)
p = numeric_limits<double>::min();
return p;
}
return _probs[make_pair(s_deg, t_deg)];
......@@ -603,7 +614,7 @@ public:
double pf = (get_prob(s_deg, ep_t_deg) *
get_prob(ep_s_deg, t_deg));
if (pf >= pi || pi == 0)
if (pf >= pi)
return ep;
if (pf == 0)
......@@ -631,6 +642,300 @@ private:
_probs;
};
// general "degree-corrected" stochastic blockmodel
// this version is based on the alias method
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockDeg>
class AliasProbabilisticRewireStrategy:
public RewireStrategyBase<Graph, EdgeIndexMap,
AliasProbabilisticRewireStrategy<Graph, EdgeIndexMap,
CorrProb, BlockDeg> >
{
public:
typedef RewireStrategyBase<Graph, EdgeIndexMap,
AliasProbabilisticRewireStrategy<Graph, EdgeIndexMap,
CorrProb, BlockDeg> >
base_t;
typedef Graph graph_t;
typedef EdgeIndexMap edge_index_t;
typedef typename BlockDeg::block_t deg_t;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
typedef typename EdgeIndexMap::value_type index_t;
AliasProbabilisticRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb corr_prob,
BlockDeg blockdeg, bool, rng_t& rng)
: base_t(g, edge_index, edges, rng), _g(g), _corr_prob(corr_prob),
_blockdeg(blockdeg)
{
tr1::unordered_set<deg_t, hash<deg_t> > deg_set;
for (size_t ei = 0; ei < base_t::_edges.size(); ++ei)
{
edge_t& e = base_t::_edges[ei];
deg_set.insert(get_deg(source(e, g), g));
deg_set.insert(get_deg(target(e, g), g));
}
for (typeof(deg_set.begin()) s_iter = deg_set.begin();
s_iter != deg_set.end(); ++s_iter)
_items.push_back(*s_iter);
for (typeof(deg_set.begin()) s_iter = deg_set.begin();
s_iter != deg_set.end(); ++s_iter)
{
vector<double> probs;
for (typeof(deg_set.begin()) t_iter = deg_set.begin();
t_iter != deg_set.end(); ++t_iter)
{
double p = _corr_prob(*s_iter, *t_iter);
if (isnan(p) || isinf(p) || p < 0)
p = 0;
// avoid zero probability to not get stuck in rejection step
if (p == 0)
p = numeric_limits<double>::min();
probs.push_back(p);
_probs[make_pair(*s_iter, *t_iter)] = p;
}
_sampler[*s_iter] = new Sampler<deg_t>(_items, probs);
}
_in_pos.resize(base_t::_edges.size());
if (!is_directed::apply<Graph>::type::value)
_out_pos.resize(base_t::_edges.size());
for (size_t i = 0; i < base_t::_edges.size(); ++i)
{
vertex_t v = target(base_t::_edges[i], g);
deg_t d = get_deg(v, g);
_in_edges[d].push_back(i);
_in_pos[i] = _in_edges[d].size() - 1;
if (!is_directed::apply<Graph>::type::value)
{
vertex_t v = source(base_t::_edges[i], g);
deg_t d = get_deg(v, g);
_out_edges[d].push_back(i);
_out_pos[i] = _out_edges[d].size() - 1;
}
}
}
~AliasProbabilisticRewireStrategy()
{
for (typeof(_sampler.begin()) iter = _sampler.begin();
iter != _sampler.end(); ++iter)
delete iter->second;
}
double get_prob(const deg_t& s_deg, const deg_t& t_deg)
{
return _probs[make_pair(s_deg, t_deg)];
}
deg_t get_deg(vertex_t v, Graph& g)
{
return _blockdeg.get_block(v, g);
}
pair<size_t, bool> get_target_edge(size_t ei)
{
deg_t s_deg = get_deg(source(base_t::_edges[ei], _g), _g);
deg_t t_deg = get_deg(target(base_t::_edges[ei], _g), _g);
deg_t nt = _sampler[s_deg]->sample(base_t::_rng);
pair<size_t, bool> ep;
tr1::bernoulli_distribution coin(_in_edges[nt].size() /
double(_in_edges[nt].size() +
_out_edges[nt].size()));
if (is_directed::apply<Graph>::type::value || coin(base_t::_rng))
{
vector<size_t>& ies = _in_edges[nt];
tr1::uniform_int<> sample(0, ies.size() - 1);
size_t epi = ies[sample(base_t::_rng)];
ep = make_pair(epi, false);
}
else
{
vector<size_t>& oes = _out_edges[nt];
tr1::uniform_int<> sample(0, oes.size() - 1);
size_t epi = oes[sample(base_t::_rng)];
ep = make_pair(epi, true);
}
deg_t ep_s_deg = get_deg(source(ep, base_t::_edges, _g), _g);
deg_t ep_t_deg = get_deg(target(ep, base_t::_edges, _g), _g);
double pi = (get_prob(s_deg, t_deg) *
get_prob(ep_s_deg, ep_t_deg));
double pf = (get_prob(s_deg, ep_t_deg) *
get_prob(ep_s_deg, t_deg));
if (pf >= pi)
return ep;
if (pf == 0)
return make_pair(ei, false); // reject
double a = exp(log(pf) - log(pi));
tr1::variate_generator<rng_t&, tr1::uniform_real<> >
rsample(base_t::_rng, tr1::uniform_real<>(0.0, 1.0));
double r = rsample();
if (r > a)
return make_pair(ei, false); // reject
else
return ep;
}
void update_edge(size_t ei, bool insert)
{
if (insert)
{
vertex_t v = target(base_t::_edges[ei], _g);
deg_t d = get_deg(v, _g);
_in_edges[d].push_back(ei);
_in_pos[ei] = _in_edges[d].size() - 1;
if (!is_directed::apply<Graph>::type::value)
{
v = source(base_t::_edges[ei], _g);
deg_t d = get_deg(v, _g);
_out_edges[d].push_back(ei);
_out_pos[ei] = _out_edges[d].size() - 1;
}
}
else
{
if (!is_directed::apply<Graph>::type::value)
{
vertex_t v = target(base_t::_edges[ei], _g);
deg_t d = get_deg(v, _g);
size_t j = _in_pos[ei];
_in_pos[_in_edges[d].back()] = j;
_in_edges[d][j] = _in_edges[d].back();
_in_edges[d].pop_back();
}
}
}
private:
Graph& _g;
EdgeIndexMap _edge_index;
CorrProb _corr_prob;
BlockDeg _blockdeg;
vector<deg_t> _items;
tr1::unordered_map<deg_t, Sampler<deg_t>* > _sampler;
tr1::unordered_map<pair<deg_t, deg_t>, double, hash<pair<deg_t, deg_t> > >
_probs;
tr1::unordered_map<deg_t, vector<size_t> > _in_edges;
tr1::unordered_map<deg_t, vector<size_t> > _out_edges;
vector<size_t> _in_pos;
vector<size_t> _out_pos;
};
// general "traditional" stochastic blockmodel
// this version is based on the alias method, and does not keep the degrees fixed
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockDeg>
class TradBlockRewireStrategy
{
public:
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
typedef typename EdgeIndexMap::value_type index_t;
typedef typename BlockDeg::block_t deg_t;
TradBlockRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb corr_prob,
BlockDeg blockdeg, bool, rng_t& rng)
: _g(g), _edge_index(edge_index), _edges(edges), _corr_prob(corr_prob),
_blockdeg(blockdeg), _rng(rng)
{
typename graph_traits<Graph>::vertex_iterator v, v_end;
for (tie(v, v_end) = vertices(_g); v != v_end; ++v)
{
deg_t d = _blockdeg.get_block(*v, g);
_vertices[d].push_back(*v);
}
vector<double> probs;
for (typeof(_vertices.begin()) s_iter = _vertices.begin();
s_iter != _vertices.end(); ++s_iter)
{
for (typeof(_vertices.begin()) t_iter = _vertices.begin();
t_iter != _vertices.end(); ++t_iter)
{
double p = _corr_prob(s_iter->first, t_iter->first);
if (isnan(p) || isinf(p) || p < 0)
p = 0;
_items.push_back(make_pair(s_iter->first, t_iter->first));
probs.push_back(p);
}
}
_sampler = new Sampler<pair<deg_t, deg_t> >(_items, probs);
}
~TradBlockRewireStrategy()
{
delete _sampler;
}
bool operator()(size_t ei, bool self_loops, bool parallel_edges)
{
const pair<deg_t, deg_t>& deg = _sampler->sample(_rng);
vector<vertex_t>& svs = _vertices[deg.first];
vector<vertex_t>& tvs = _vertices[deg.second];
tr1::uniform_int<size_t> s_sample(0, svs.size() - 1);
tr1::uniform_int<size_t> t_sample(0, tvs.size() - 1);
typename graph_traits<Graph>::vertex_descriptor s, t;
s = svs[s_sample(_rng)];
t = tvs[t_sample(_rng)];
// reject self-loops if not allowed
if(!self_loops && s == t)
return false;
// reject parallel edges if not allowed
if (!parallel_edges && is_adjacent(s, t, _g))
return false;
remove_edge(_edges[ei], _g);
edge_t ne = add_edge(s, t, _g).first;
_edges[ei] = ne;
return true;
}
private:
Graph& _g;
EdgeIndexMap _edge_index;
vector<edge_t>& _edges;
CorrProb _corr_prob;
BlockDeg _blockdeg;
rng_t& _rng;
tr1::unordered_map<deg_t, vector<vertex_t> > _vertices;
vector<pair<deg_t, deg_t> > _items;
Sampler<pair<deg_t, deg_t> >* _sampler;
};
} // graph_tool namespace
#endif // GRAPH_REWIRING_HH
......@@ -19,253 +19,98 @@
#define SAMPLER_HH
#include "random.hh"
#include <iostream>
#include <functional>
namespace graph_tool
{
using namespace std;
using namespace boost;
// utility class to sample uniformly from a collection of values