Commit da179ff0 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Fix sampling of stochastic blockmodel to guarantee correct asymptotic probability

parent ca503515
......@@ -800,6 +800,60 @@ void do_collect_vertex_marginals(GraphInterface& gi, boost::any ob,
vertex_scalar_vector_properties())(op);
}
struct get_deg_entropy
{
template <class Graph, class Vprop>
void operator()(Graph& g, Vprop b, size_t B, double& S) const
{
#ifdef HAVE_SPARSEHASH
typedef dense_hash_map<pair<size_t,size_t>, int, boost::hash<pair<size_t,size_t>>> map_t;
#else
typedef unordered_map<pair<size_t,size_t>, int, boost::hash<pair<size_t,size_t>>> map_t;
#endif
vector<map_t> hist(B);
vector<int> total(B);
#ifdef HAVE_SPARSEHASH
for (size_t r = 0; r < B; ++r)
hist[r].set_empty_key(make_pair(numeric_limits<size_t>::max(),
numeric_limits<size_t>::max()));
#endif
typename graph_traits<Graph>::vertex_iterator v, v_end;
for (tie(v, v_end) = vertices(g); v != v_end; ++v)
{
hist[b[*v]][make_pair(in_degreeS()(*v, g), out_degree(*v, g))]++;
total[b[*v]]++;
}
S = 0;
for (size_t r = 0; r < B; ++r)
{
for (auto iter = hist[r].begin(); iter != hist[r].end(); ++iter)
{
double p = iter->second / double(total[r]);
S -= p * log(p) * total[r];
}
}
}
};
double do_get_deg_entropy(GraphInterface& gi, boost::any ob, size_t B)
{
typedef property_map_type::apply<int32_t,
GraphInterface::vertex_index_map_t>::type
vmap_t;
vmap_t b = any_cast<vmap_t>(ob);
double S = 0;
run_action<>()
(gi, std::bind(get_deg_entropy(),
placeholders::_1, b, B, std::ref(S)))();
return S;
}
vector<int32_t> get_vector(size_t n)
{
return vector<int32_t>(n);
......@@ -862,6 +916,7 @@ void export_blockmodel()
def("entropy", do_get_ent);
def("entropy_dense", do_get_ent_dense);
def("deg_entropy", do_get_deg_entropy);
def("edge_marginals", do_collect_edge_marginals);
def("bethe_entropy", do_bethe_entropy);
......
......@@ -19,6 +19,7 @@
#include "graph_util.hh"
#include "graph_filtering.hh"
#include "graph_generation.hh"
#include "sampler.hh"
#include <boost/python.hpp>
using namespace std;
......@@ -109,4 +110,9 @@ BOOST_PYTHON_MODULE(libgraph_tool_generation)
def("price", &price);
def("complete", &complete);
def("circular", &circular);
class_<Sampler<int, boost::mpl::false_>>("Sampler",
init<const vector<int>&, const vector<double>&>())
.def("sample", &Sampler<int, boost::mpl::false_>::sample<rng_t>,
return_value_policy<copy_const_reference>());
}
......@@ -33,7 +33,9 @@ class PythonFuncWrap
public:
PythonFuncWrap(boost::python::object o): _o(o) {}
double operator()(pair<size_t, size_t> deg1, pair<size_t, size_t> deg2)
typedef pair<size_t, size_t> deg_t;
double operator()(deg_t deg1, deg_t deg2)
const
{
boost::python::object ret = _o(boost::python::make_tuple(deg1.first, deg1.second),
......@@ -48,6 +50,25 @@ public:
return boost::python::extract<double>(ret);
}
template <class ProbMap>
void get_probs(ProbMap& probs) const
{
typedef typename ProbMap::key_type::first_type block_t;
if (PyObject_HasAttrString(_o.ptr(), "__getitem__"))
{
int N = boost::python::len(_o);
for (int i = 0; i < N; ++i)
{
block_t ks = boost::python::extract<block_t>(_o[i][0])();
block_t kt = boost::python::extract<block_t>(_o[i][1])();
double p = boost::python::extract<double>(_o[i][2])();
if (std::isnan(p) || std::isinf(p) || p <= 0)
continue;
probs[make_pair(ks, kt)] += p;
}
}
}
private:
boost::python::object _o;
};
......@@ -86,6 +107,22 @@ struct graph_rewire_block
};
struct graph_rewire_correlated
{
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockProp>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng, BlockProp block_prop) const
{
graph_rewire<CorrelatedRewireStrategy>()
(g, edge_index, corr_prob, self_loops, parallel_edges, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
}
};
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
bool alias, bool traditional, bool persist,
......@@ -96,6 +133,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
size_t pcount = 0;
if (strat == "erdos")
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<ErdosRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(),
......@@ -104,7 +142,9 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else if (strat == "uncorrelated")
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<RandomRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
......@@ -112,7 +152,11 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else if (strat == "correlated")
{
if (block.empty())
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<CorrelatedRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
......@@ -120,7 +164,22 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire_correlated(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng),
placeholders::_2),
vertex_properties())(block);
}
}
else if (strat == "probabilistic")
{
run_action<>()
(gi, std::bind(graph_rewire<ProbabilisticRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
......@@ -128,7 +187,9 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else if (strat == "blockmodel")
{
run_action<>()
(gi, std::bind(graph_rewire_block(alias, traditional),
placeholders::_1, gi.GetEdgeIndex(),
......@@ -139,7 +200,10 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)),
vertex_properties())(block);
}
else
{
throw ValueException("invalid random rewire strategy: " + strat);
}
return pcount;
}
......@@ -32,6 +32,10 @@
#include "random.hh"
#ifdef HAVE_SPARSEHASH
#include <sparsehash/dense_hash_map>
#endif
namespace graph_tool
{
using namespace std;
......@@ -68,7 +72,8 @@ struct swap_edge
{
template <class Graph>
static bool
parallel_check_target (size_t e, const pair<size_t, bool>& te,
parallel_check_target (const pair<size_t, bool>& e,
const pair<size_t, bool>& te,
vector<typename graph_traits<Graph>::edge_descriptor>& edges,
const Graph &g)
{
......@@ -81,8 +86,8 @@ struct swap_edge
// no parallel edges are introduced.
typename graph_traits<Graph>::vertex_descriptor
s = source(edges[e], g), // current source
t = target(edges[e], g), // current target
s = source(e, edges, g), // current source
t = target(e, edges, g), // current target
nt = target(te, edges, g), // new target
te_s = source(te, edges, g); // target edge source
......@@ -93,11 +98,12 @@ struct swap_edge
return false; // the coast is clear - hooray!
}
template <class Graph, class EdgeIndexMap>
template <class Graph>
static void swap_target
(size_t e, const pair<size_t, bool>& te,
(const pair<size_t, bool>& e,
const pair<size_t, bool>& te,
vector<typename graph_traits<Graph>::edge_descriptor>& edges,
EdgeIndexMap, Graph& g)
Graph& g)
{
// swap the target of the edge 'e' with the target of edge 'te', as
// such:
......@@ -105,22 +111,25 @@ struct swap_edge
// (s) -e--> (t) (s) -e--> (nt)
// (te_s) -te-> (nt) => (te_s) -te-> (t)
if (e == te.first)
if (e.first == te.first)
return;
// new edges which will replace the old ones
typename graph_traits<Graph>::edge_descriptor ne, nte;
typename graph_traits<Graph>::vertex_descriptor
s_e = source(edges[e], g),
t_e = target(edges[e], g),
s_e = source(e, edges, g),
t_e = target(e, edges, g),
s_te = source(te, edges, g),
t_te = target(te, edges, g);
remove_edge(edges[e], g);
remove_edge(edges[e.first], g);
remove_edge(edges[te.first], g);
if (is_directed::apply<Graph>::type::value || !e.second)
ne = add_edge(s_e, t_te, g).first;
edges[e] = ne;
if (!te.second)
else // keep invertedness (only for undirected graphs)
ne = add_edge(t_te, s_e, g).first;
edges[e.first] = ne;
if (is_directed::apply<Graph>::type::value || !te.second)
nte = add_edge(s_te, t_e, g).first;
else // keep invertedness (only for undirected graphs)
nte = add_edge(t_e, s_te, g).first;
......@@ -183,6 +192,56 @@ private:
PropertyMap _p;
};
// select an appropriate "null" key for densehash
template <class Type>
struct get_null_key
{
Type operator()() const
{
return numeric_limits<Type>::max();
}
};
template <>
struct get_null_key<string>
{
string operator()() const
{
return lexical_cast<string>(get_null_key<size_t>()());
}
};
template <>
struct get_null_key<boost::python::object>
{
boost::python::object operator()() const
{
return boost::python::object();
}
};
template <class Type>
struct get_null_key<vector<Type>>
{
vector<Type> operator()() const
{
vector<Type> v(1);
v[0] = get_null_key<Type>()();
return v;
}
};
template <class Type1, class Type2>
struct get_null_key<pair<Type1, Type2>>
{
pair<Type1, Type2> operator()() const
{
return make_pair(get_null_key<Type1>()(),
get_null_key<Type2>()());
}
};
// main rewire loop
template <template <class Graph, class EdgeIndexMap, class CorrProb,
class BlockDeg>
......@@ -206,10 +265,6 @@ struct graph_rewire
vector<edge_t> edges;
vector<size_t> edge_pos;
typedef random_permutation_iterator<typename vector<size_t>::iterator,
rng_t>
random_edge_iter;
typename graph_traits<Graph>::edge_iterator e, e_end;
for (tie(e, e_end) = boost::edges(g); e != e_end; ++e)
{
......@@ -217,6 +272,10 @@ struct graph_rewire
edge_pos.push_back(edge_pos.size());
}
typedef random_permutation_iterator<typename vector<size_t>::iterator,
rng_t>
random_edge_iter;
RewireStrategy<Graph, EdgeIndexMap, CorrProb, BlockDeg>
rewire(g, edge_index, edges, corr_prob, bd, cache, rng);
......@@ -233,17 +292,19 @@ struct graph_rewire
ei_begin(edge_pos.begin(), edge_pos.end(), rng),
ei_end(edge_pos.end(), edge_pos.end(), rng);
// for each edge rewire its source or target
for (random_edge_iter ei = ei_begin; ei != ei_end; ++ei)
{
size_t e_pos = ei - ei_begin;
if (verbose)
print_progress(i, niter, e_pos, no_sweep ? 1 : edges.size(),
str);
size_t e = *ei;
bool success = false;
do
{
success = rewire(*ei, self_loops, parallel_edges);
success = rewire(e, self_loops, parallel_edges);
}
while(persist && !success);
......@@ -337,7 +398,7 @@ class RewireStrategyBase
{
public:
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
typedef typename graph_traits<Graph>::edge_descriptor vertex_t;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename EdgeIndexMap::value_type index_t;
......@@ -352,31 +413,33 @@ public:
// try randomly drawn pairs of edges and check if they satisfy all the
// consistency checks
pair<size_t, bool> e = make_pair(ei, false);
// rewire target
pair<size_t, bool> et = self.get_target_edge(ei);
pair<size_t, bool> et = self.get_target_edge(e);
if (!self_loops) // reject self-loops if not allowed
{
if((source(_edges[ei], _g) == target(et, _edges, _g)) ||
(target(_edges[ei], _g) == source(et, _edges, _g)))
if((source(e, _edges, _g) == target(et, _edges, _g)) ||
(target(e, _edges, _g) == source(et, _edges, _g)))
return false;
}
// reject parallel edges if not allowed
if (!parallel_edges && (et.first != ei))
if (!parallel_edges && (et.first != e.first))
{
if (swap_edge::parallel_check_target(ei, et, _edges, _g))
if (swap_edge::parallel_check_target(e, et, _edges, _g))
return false;
}
if (ei != et.first)
if (e.first != et.first)
{
self.update_edge(ei, false);
self.update_edge(e.first, false);
self.update_edge(et.first, false);
swap_edge::swap_target(ei, et, _edges, _edge_index, _g);
swap_edge::swap_target(e, et, _edges, _g);
self.update_edge(ei, true);
self.update_edge(e.first, true);
self.update_edge(et.first, true);
}
else
......@@ -423,7 +486,7 @@ public:
bool, rng_t& rng)
: base_t(g, edge_index, edges, rng), _g(g) {}
pair<size_t,bool> get_target_edge(size_t)
pair<size_t,bool> get_target_edge(pair<size_t,bool>& e)
{
std::uniform_int_distribution<> sample(0, base_t::_edges.size() - 1);
pair<size_t, bool> et = make_pair(sample(base_t::_rng), false);
......@@ -431,6 +494,7 @@ public:
{
std::bernoulli_distribution coin(0.5);
et.second = coin(base_t::_rng);
e.second = coin(base_t::_rng);
}
return et;
}
......@@ -462,11 +526,17 @@ public:
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
typedef typename BlockDeg::block_t deg_t;
CorrelatedRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb, BlockDeg,
vector<edge_t>& edges, CorrProb, BlockDeg blockdeg,
bool, rng_t& rng)
: base_t(g, edge_index, edges, rng), _g(g)
: base_t(g, edge_index, edges, rng), _blockdeg(blockdeg), _g(g)
{
#ifdef HAVE_SPARSEHASH
_edges_by_target.set_empty_key(get_null_key<deg_t>()());
#endif
for (size_t ei = 0; ei < base_t::_edges.size(); ++ei)
{
// For undirected graphs, there is no difference between source and
......@@ -475,38 +545,58 @@ public:
edge_t& e = base_t::_edges[ei];
vertex_t t = target(e, _g);
deg_t tdeg = make_pair(in_degreeS()(t, _g), out_degree(t, _g));
deg_t tdeg = get_deg(t, _g);;
_edges_by_target[tdeg].push_back(make_pair(ei, false));
if (!is_directed::apply<Graph>::type::value)
{
t = source(e, _g);
tdeg = make_pair(in_degreeS()(t, _g), out_degree(t, _g));
deg_t tdeg = get_deg(t, _g);
_edges_by_target[tdeg].push_back(make_pair(ei, true));
}
}
}
pair<size_t,bool> get_target_edge(size_t ei)
pair<size_t,bool> get_target_edge(pair<size_t, bool>& e)
{
edge_t& e = base_t::_edges[ei];
vertex_t t = target(e, _g);
deg_t tdeg = make_pair(in_degreeS()(t, _g), out_degree(t, _g));
typename edges_by_end_deg_t::mapped_type& elist =
_edges_by_target[tdeg];
std::uniform_int_distribution<> sample(0, elist.size() - 1);
if (!is_directed::apply<Graph>::type::value)
{
std::bernoulli_distribution coin(0.5);
e.second = coin(base_t::_rng);
}
return elist[sample(base_t::_rng)];
vertex_t t = target(e, base_t::_edges, _g);
deg_t tdeg = get_deg(t, _g);
auto& elist = _edges_by_target[tdeg];
std::uniform_int_distribution<> sample(0, elist.size() - 1);
auto ep = elist[sample(base_t::_rng)];
if (get_deg(target(ep, base_t::_edges, _g), _g) != tdeg)
ep.second = not ep.second;
return ep;
}
void update_edge(size_t, bool) {}
deg_t get_deg(vertex_t v, const Graph& g)
{
return _blockdeg.get_block(v, g);
}
private:
typedef pair<size_t, size_t> deg_t;
BlockDeg _blockdeg;
#ifdef HAVE_SPARSEHASH
typedef google::dense_hash_map<deg_t,
vector<pair<size_t, bool>>,
boost::hash<deg_t>>
edges_by_end_deg_t;
#else
typedef std::unordered_map<deg_t,
vector<pair<size_t, bool> >,
boost::hash<deg_t> >
vector<pair<size_t, bool>>,
boost::hash<deg_t>>
edges_by_end_deg_t;
#endif
edges_by_end_deg_t _edges_by_target;
protected:
......@@ -543,9 +633,16 @@ public:
: base_t(g, edge_index, edges, rng), _g(g), _corr_prob(corr_prob),
_blockdeg(blockdeg)
{
#ifdef HAVE_SPARSEHASH
_probs.set_empty_key(get_null_key<pair<deg_t, deg_t>>()());
#endif
if (cache)
{
// cache probabilities
_corr_prob.get_probs(_probs);
if (_probs.empty())
{
std::unordered_set<deg_t, boost::hash<deg_t> > deg_set;
for (size_t ei = 0; ei < base_t::_edges.size(); ++ei)
{
......@@ -554,20 +651,20 @@ public:
deg_set.insert(get_deg(target(e, g), g));
}
for (typeof(deg_set.begin()) s_iter = deg_set.begin();
s_iter != deg_set.end(); ++s_iter)
for (typeof(deg_set.begin()) t_iter = deg_set.begin();
t_iter != deg_set.end(); ++t_iter)
for (auto s_iter = deg_set.begin(); s_iter != deg_set.end(); ++s_iter)
for (auto t_iter = deg_set.begin(); t_iter != deg_set.end(); ++t_iter)
{
double p = _corr_prob(*s_iter, *t_iter);
if (std::isnan(p) || std::isinf(p) || p < 0)
p = 0;
// avoid zero probability to not get stuck in rejection step
if (p == 0)
p = numeric_limits<double>::min();
_probs[make_pair(*s_iter, *t_iter)] = p;
}
}
for (auto iter = _probs.begin(); iter != _probs.end(); ++iter)
{
double& p = iter->second;
p = log(p);
}
}
}