Commit 581ba9fe authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

random_rewire(): Add "blockmodel-micro" model and implement maximum entropy sampling

parent aeaff00d
Pipeline #370 failed with stage
in 395 minutes and 16 seconds
......@@ -15,6 +15,8 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#define BOOST_PYTHON_MAX_ARITY 40
#include "graph.hh"
#include "graph_util.hh"
#include "graph_filtering.hh"
......@@ -75,9 +77,10 @@ void generate_sbm(GraphInterface& gi, boost::any ab, boost::python::object ors,
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
bool alias, bool traditional, bool persist,
boost::python::object corr_prob, boost::any apin,
boost::any block, bool cache, rng_t& rng, bool verbose);
bool configuration, bool alias, bool traditional,
bool micro, bool persist, boost::python::object corr_prob,
boost::any apin, boost::any block, bool cache, rng_t& rng,
bool verbose);
void predecessor_graph(GraphInterface& gi, GraphInterface& gpi,
boost::any pred_map);
void line_graph(GraphInterface& gi, GraphInterface& lgi,
......
......@@ -464,7 +464,9 @@ struct gen_graph
set<deg_t, cmp_in<greater<size_t> > > targets;
// vertices with a given degree
gt_hash_map<deg_t, vector<size_t>> vset;
unordered_map<deg_t, vector<size_t>> vset; // can't use gt_hash_map, as
// internal pointers are
// invalidated after insert
size_t num_e = 0;
for (size_t i = 0; i < vertices.size(); ++i)
......
......@@ -75,34 +75,45 @@ private:
struct graph_rewire_block
{
graph_rewire_block(bool alias, bool traditional) : alias(alias), traditional(traditional) {}
graph_rewire_block(bool alias, bool traditional, bool micro) :
alias(alias), traditional(traditional), micro(micro) {}
bool alias;
bool traditional;
bool micro;
template <class Graph, class EdgeIndexMap, class CorrProb, class PinMap,
class BlockProp>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
PinMap pin, pair<bool, bool> rest, BlockProp block_prop,
pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose, size_t& pcount, rng_t& rng)
const
PinMap pin, pair<bool, bool> rest, bool configuration,
BlockProp block_prop, pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose, size_t& pcount,
rng_t& rng) const
{
if (traditional)
{
graph_rewire<TradBlockRewireStrategy>()
(g, edge_index, corr_prob, pin, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
if (micro)
graph_rewire<MicroTradBlockRewireStrategy>()
(g, edge_index, corr_prob, pin, rest.first, rest.second,
configuration, iter_sweep, cache_verbose, pcount, rng,
PropertyBlock<BlockProp>(block_prop));
else
graph_rewire<CanTradBlockRewireStrategy>()
(g, edge_index, corr_prob, pin, rest.first, rest.second,
configuration, iter_sweep, cache_verbose, pcount, rng,
PropertyBlock<BlockProp>(block_prop));
}
else
{
if (alias)
graph_rewire<AliasProbabilisticRewireStrategy>()
(g, edge_index, corr_prob, pin, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
(g, edge_index, corr_prob, pin, rest.first, rest.second,
configuration, iter_sweep,cache_verbose, pcount, rng,
PropertyBlock<BlockProp>(block_prop));
else
graph_rewire<ProbabilisticRewireStrategy>()
(g, edge_index, corr_prob, pin, rest.first, rest.second, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
(g, edge_index, corr_prob, pin, rest.first, rest.second,
configuration, iter_sweep, cache_verbose, pcount, rng,
PropertyBlock<BlockProp>(block_prop));
}
}
};
......@@ -114,22 +125,24 @@ struct graph_rewire_correlated
class BlockProp>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
PinMap pin, bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng, BlockProp block_prop) const
bool configuration, pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose, size_t& pcount,
rng_t& rng, BlockProp block_prop) const
{
graph_rewire<CorrelatedRewireStrategy>()
(g, edge_index, corr_prob, pin, self_loops, parallel_edges, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
(g, edge_index, corr_prob, pin, self_loops, parallel_edges,
configuration, iter_sweep, cache_verbose, pcount, rng,
PropertyBlock<BlockProp>(block_prop));
}
};
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
bool alias, bool traditional, bool persist,
boost::python::object corr_prob, boost::any apin,
boost::any block, bool cache, rng_t& rng, bool verbose)
bool configuration, bool alias, bool traditional,
bool micro, bool persist, boost::python::object corr_prob,
boost::any apin, boost::any block, bool cache, rng_t& rng,
bool verbose)
{
PythonFuncWrap corr(corr_prob);
size_t pcount = 0;
......@@ -145,7 +158,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
(gi, std::bind(graph_rewire<ErdosRewireStrategy>(),
std::placeholders::_1, gi.get_edge_index(),
std::ref(corr), pin,
self_loops, parallel_edges,
self_loops, parallel_edges, configuration,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
......@@ -155,7 +168,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<RandomRewireStrategy>(),
std::placeholders::_1, gi.get_edge_index(), std::ref(corr),
pin, self_loops, parallel_edges,
pin, self_loops, parallel_edges, configuration,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
......@@ -167,7 +180,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<CorrelatedRewireStrategy>(),
std::placeholders::_1, gi.get_edge_index(), std::ref(corr),
pin, self_loops, parallel_edges,
pin, self_loops, parallel_edges, configuration,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
......@@ -177,7 +190,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire_correlated(),
std::placeholders::_1, gi.get_edge_index(), std::ref(corr),
pin, self_loops, parallel_edges,
pin, self_loops, parallel_edges, configuration,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng),
......@@ -190,7 +203,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
run_action<>()
(gi, std::bind(graph_rewire<ProbabilisticRewireStrategy>(),
std::placeholders::_1, gi.get_edge_index(), std::ref(corr),
pin, self_loops, parallel_edges,
pin, self_loops, parallel_edges, configuration,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
......@@ -198,10 +211,11 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
else if (strat == "blockmodel")
{
run_action<>()
(gi, std::bind(graph_rewire_block(alias, traditional),
(gi, std::bind(graph_rewire_block(alias, traditional, micro),
std::placeholders::_1, gi.get_edge_index(),
std::ref(corr), pin,
make_pair(self_loops, parallel_edges),
configuration,
std::placeholders::_2,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
......
......@@ -286,10 +286,9 @@ struct graph_rewire
class BlockDeg, class PinMap>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
PinMap pin, bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng, BlockDeg bd)
const
bool configuration, pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose, size_t& pcount,
rng_t& rng, BlockDeg bd) const
{
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
bool persist = std::get<0>(cache_verbose);
......@@ -312,7 +311,8 @@ struct graph_rewire
random_edge_iter;
RewireStrategy<Graph, EdgeIndexMap, CorrProb, BlockDeg>
rewire(g, edge_index, edges, corr_prob, bd, cache, rng, parallel_edges);
rewire(g, edge_index, edges, corr_prob, bd, cache, rng,
parallel_edges, configuration);
size_t niter;
bool no_sweep;
......@@ -357,13 +357,13 @@ struct graph_rewire
template <class Graph, class EdgeIndexMap, class CorrProb, class PinMap>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
PinMap pin, bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng)
const
bool configuration, pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose, size_t& pcount,
rng_t& rng) const
{
operator()(g, edge_index, corr_prob, pin, self_loops, parallel_edges,
iter_sweep, cache_verbose, pcount, rng, DegreeBlock());
configuration, iter_sweep, cache_verbose, pcount, rng,
DegreeBlock());
}
};
......@@ -379,18 +379,32 @@ public:
ErdosRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb, BlockDeg,
bool, rng_t& rng, bool)
bool, rng_t& rng, bool, bool configuration)
: _g(g), _edge_index(edge_index), _edges(edges),
_vertices(HardNumVertices()(g)), _rng(rng)
_vertices(HardNumVertices()(g)), _rng(rng),
_configuration(configuration),
_nmap(get(vertex_index, g), num_vertices(g))
{
decltype(_vertices.begin()) viter = _vertices.begin();
typename graph_traits<Graph>::vertex_iterator v, v_end;
for (tie(v, v_end) = vertices(_g); v != v_end; ++v)
*(viter++) = *v;
if (!configuration)
{
for (size_t i = 0; i < edges.size(); ++i)
add_count(source(edges[i], g), target(edges[i], g), _nmap, g);
}
}
bool operator()(size_t ei, bool self_loops, bool parallel_edges)
{
size_t e_s = source(_edges[ei], _g);
size_t e_t = target(_edges[ei], _g);
if (!is_directed::apply<Graph>::type::value && e_s > e_t)
std::swap(e_s, e_t);
//try randomly drawn pairs of vertices
std::uniform_int_distribution<size_t> sample(0, _vertices.size() - 1);
typename graph_traits<Graph>::vertex_descriptor s, t;
......@@ -398,21 +412,55 @@ public:
{
s = sample(_rng);
t = sample(_rng);
if (s == t)
{
if (!self_loops) // reject self-loops if not allowed
continue;
// reject self-loops if not allowed
if(s == t && !self_loops)
continue;
}
else if (!is_directed::apply<Graph>::type::value && self_loops)
{
// sample self-loops w/ correct probability for undirected
// graphs
std::bernoulli_distribution reject(.5);
if (reject(_rng))
continue;
}
break;
}
if (!is_directed::apply<Graph>::type::value && s > t)
std::swap(s, t);
if (s == e_s && t == e_t)
return false;
// reject parallel edges if not allowed
if (!parallel_edges && is_adjacent(s, t, _g))
return false;
if (!_configuration)
{
size_t m = get_count(s, t, _nmap, _g);
size_t m_e = get_count(e_s, e_t, _nmap, _g);
double a = (m + 1) / double(m_e);
std::bernoulli_distribution accept(std::min(a, 1.));
if (!accept(_rng))
return false;
}
remove_edge(_edges[ei], _g);
edge_t ne = add_edge(s, t, _g).first;
_edges[ei] = ne;
if (!_configuration)
{
remove_count(e_s, e_t, _nmap, _g);
add_count(s, t, _nmap, _g);
}
return true;
}
......@@ -422,6 +470,12 @@ private:
vector<edge_t>& _edges;
vector<typename graph_traits<Graph>::vertex_descriptor> _vertices;
rng_t& _rng;
bool _configuration;
typedef gt_hash_map<size_t, size_t> nmapv_t;
typedef typename property_map_type::apply<nmapv_t,
typename property_map<Graph, vertex_index_t>::type>
::type::unchecked_t nmap_t;
nmap_t _nmap;
};
......@@ -439,11 +493,12 @@ public:
typedef typename EdgeIndexMap::value_type index_t;
RewireStrategyBase(Graph& g, EdgeIndexMap edge_index, vector<edge_t>& edges,
rng_t& rng, bool parallel_edges)
rng_t& rng, bool parallel_edges, bool configuration)
: _g(g), _edge_index(edge_index), _edges(edges), _rng(rng),
_nmap(get(vertex_index, g), num_vertices(g))
_nmap(get(vertex_index, g), num_vertices(g)),
_configuration(configuration)
{
if (!parallel_edges)
if (!parallel_edges || !configuration)
{
for (size_t i = 0; i < edges.size(); ++i)
add_count(source(edges[i], g), target(edges[i], g), _nmap, g);
......@@ -462,10 +517,17 @@ public:
// rewire target
pair<size_t, bool> et = self.get_target_edge(e, parallel_edges);
if (et.first == ei)
return false;
auto s = source(e, _edges, _g);
auto t = target(e, _edges, _g);
auto ts = source(et, _edges, _g);
auto tt = target(et, _edges, _g);
if (!self_loops) // reject self-loops if not allowed
{
if((source(e, _edges, _g) == target(et, _edges, _g)) ||
(target(e, _edges, _g) == source(et, _edges, _g)))
if(s == tt || ts == t)
return false;
}
......@@ -476,31 +538,58 @@ public:
return false;
}
if (e.first != et.first)
double a = 0;
if (!is_directed::apply<Graph>::type::value)
{
a -= log(2 + (s == t) + (ts == tt));
a += log(2 + (s == tt) + (ts == t));
}
if (!_configuration)
{
self.update_edge(e.first, false);
self.update_edge(et.first, false);
map<std::pair<size_t, size_t>, int> delta;
delta[std::make_pair(s, t)] -= 1;
delta[std::make_pair(ts, tt)] -= 1;
delta[std::make_pair(s, tt)] += 1;
delta[std::make_pair(ts, t)] += 1;
if (!parallel_edges)
for (auto& e_d : delta)
{
remove_count(source(e, _edges, _g), target(e, _edges, _g), _nmap, _g);
remove_count(source(et, _edges, _g), target(et, _edges, _g), _nmap, _g);
auto u = e_d.first.first;
auto v = e_d.first.second;
int d = e_d.second;
size_t m = get_count(u, v, _nmap, _g);
a -= lgamma(m + 1) - lgamma((m + 1) + d);
if (!is_directed::apply<Graph>::type::value && u == v)
a += d * log(2);
}
swap_edge::swap_target(e, et, _edges, _g);
}
self.update_edge(e.first, true);
self.update_edge(et.first, true);
std::bernoulli_distribution accept(std::min(exp(a), 1.));
if (!accept(_rng))
return false;
if (!parallel_edges)
{
add_count(source(e, _edges, _g), target(e, _edges, _g), _nmap, _g);
add_count(source(et, _edges, _g), target(et, _edges, _g), _nmap, _g);
}
self.update_edge(e.first, false);
self.update_edge(et.first, false);
if (!parallel_edges || !_configuration)
{
remove_count(source(e, _edges, _g), target(e, _edges, _g), _nmap, _g);
remove_count(source(et, _edges, _g), target(et, _edges, _g), _nmap, _g);
}
else
swap_edge::swap_target(e, et, _edges, _g);
self.update_edge(e.first, true);
self.update_edge(et.first, true);
if (!parallel_edges || !_configuration)
{
return false;
add_count(source(e, _edges, _g), target(e, _edges, _g), _nmap, _g);
add_count(source(et, _edges, _g), target(et, _edges, _g), _nmap, _g);
}
return true;
......@@ -517,6 +606,7 @@ protected:
typename property_map<Graph, vertex_index_t>::type>
::type::unchecked_t nmap_t;
nmap_t _nmap;
bool _configuration;
};
// this will rewire the edges so that the combined (in, out) degree distribution
......@@ -545,8 +635,10 @@ public:
RandomRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb, BlockDeg,
bool, rng_t& rng, bool parallel_edges)
: base_t(g, edge_index, edges, rng, parallel_edges), _g(g) {}
bool, rng_t& rng, bool parallel_edges,
bool configuration)
: base_t(g, edge_index, edges, rng, parallel_edges, configuration),
_g(g) {}
pair<size_t,bool> get_target_edge(pair<size_t,bool>& e, bool)
{
......@@ -592,8 +684,10 @@ public:
CorrelatedRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb, BlockDeg blockdeg,
bool, rng_t& rng, bool parallel_edges)
: base_t(g, edge_index, edges, rng, parallel_edges), _blockdeg(blockdeg), _g(g)
bool, rng_t& rng, bool parallel_edges,
bool configuration)
: base_t(g, edge_index, edges, rng, parallel_edges, configuration),
_blockdeg(blockdeg), _g(g)
{
for (size_t ei = 0; ei < base_t::_edges.size(); ++ei)
{
......@@ -680,9 +774,9 @@ public:
ProbabilisticRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb corr_prob,
BlockDeg blockdeg, bool cache, rng_t& rng,
bool parallel_edges)
: base_t(g, edge_index, edges, rng, parallel_edges), _g(g), _corr_prob(corr_prob),
_blockdeg(blockdeg)
bool parallel_edges, bool configuration)
: base_t(g, edge_index, edges, rng, parallel_edges, configuration),
_g(g), _corr_prob(corr_prob), _blockdeg(blockdeg)
{
if (cache)
{
......@@ -823,9 +917,9 @@ public:
AliasProbabilisticRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb corr_prob,
BlockDeg blockdeg, bool, rng_t& rng,
bool parallel_edges)
: base_t(g, edge_index, edges, rng, parallel_edges), _g(g),
_corr_prob(corr_prob), _blockdeg(blockdeg)
bool parallel_edges, bool configuration)
: base_t(g, edge_index, edges, rng, parallel_edges, configuration),
_g(g), _corr_prob(corr_prob), _blockdeg(blockdeg)
{
_in_pos.resize(base_t::_edges.size());
if (!is_directed::apply<Graph>::type::value)
......@@ -1148,7 +1242,8 @@ private:
// general "traditional" stochastic blockmodel
// this version is based on the alias method, and does not keep the degrees fixed
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockDeg>
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockDeg,
bool micro>
class TradBlockRewireStrategy
{
public:
......@@ -1160,10 +1255,12 @@ public:
TradBlockRewireStrategy(Graph& g, EdgeIndexMap edge_index,
vector<edge_t>& edges, CorrProb corr_prob,
BlockDeg blockdeg, bool, rng_t& rng,
bool)
bool parallel_edges, bool configuration)
: _g(g), _edge_index(edge_index), _edges(edges), _corr_prob(corr_prob),
_blockdeg(blockdeg), _rng(rng), _sampler(nullptr)
_blockdeg(blockdeg), _rng(rng), _sampler(nullptr),
_configuration(configuration),
_nmap(get(vertex_index, g), num_vertices(g))
{
for (auto v : vertices_range(_g))
{
......@@ -1171,44 +1268,53 @@ public:
_vertices[d].push_back(v);
}
std::unordered_map<pair<deg_t, deg_t>, double> probs;
_corr_prob.get_probs(probs);
vector<double> dprobs;
if (probs.empty())
if (!micro)
{
for (auto& s : _vertices)
std::unordered_map<pair<deg_t, deg_t>, double> probs;
_corr_prob.get_probs(probs);
vector<double> dprobs;
if (probs.empty())
{
for (auto& t : _vertices)
for (auto& s : _vertices)
{
double p = _corr_prob(s.first, t.first);
if (std::isnan(p) || std::isinf(p) || p <= 0)
continue;
for (auto& t : _vertices)
{
double p = _corr_prob(s.first, t.first);
if (std::isnan(p) || std::isinf(p) || p <= 0)
continue;
_items.push_back(make_pair(s.first, t.first));
dprobs.push_back(p * s.second.size() * t.second.size());
_items.push_back(make_pair(s.first, t.first));
dprobs.push_back(p * s.second.size() * t.second.size());
}
}
}
}
else
{
for (auto& stp : probs)
else
{
deg_t s = stp.first.first;
deg_t t = stp.first.second;
double p = stp.second;
// avoid zero probability to not get stuck in rejection step
if (std::isnan(p) || std::isinf(p) || p <= 0)
continue;
_items.push_back(make_pair(s, t));
dprobs.push_back(p * _vertices[s].size() * _vertices[t].size());
for (auto& stp : probs)
{
deg_t s = stp.first.first;
deg_t t = stp.first.second;
double p = stp.second;
// avoid zero probability to not get stuck in rejection step
if (std::isnan(p) || std::isinf(p) || p <= 0)
continue;