Commit 8240714e authored by Tiago Peixoto's avatar Tiago Peixoto

Several improvements to random_rewire() / random_graph()

This introduces several simplifications and corrections to the graph
rewire algorithm, to guarantee unbiased sampling.

Now a move is outright rejected if it produces a
self-loop/parallel-edge, instead of retried. This also adds a
"non-sweep" mode, where edges are rewired randomly, possibly with
repetition.

The edge moves are now simplified to the target of the edges only,
since swaping sources is redundant.

The number of iterations can now be explicitly modified, so it is not
necessary to call the function more than once, and it is emphasized in
the documentation that only after sufficiently many iterations can the
graph be guaranteed to be fully mixed.
parent aeaadabc
......@@ -48,11 +48,11 @@ private:
python::object _o;
};
void generate_random_graph(GraphInterface& gi, size_t N,
python::object deg_sample,
bool uncorrelated, bool no_parallel,
bool no_self_loops, bool undirected,
size_t seed, bool verbose, bool verify)
void generate_graph(GraphInterface& gi, size_t N,
python::object deg_sample,
bool uncorrelated, bool no_parallel,
bool no_self_loops, bool undirected,
size_t seed, bool verbose, bool verify)
{
typedef graph_tool::detail::get_all_graph_views::apply<
graph_tool::detail::scalar_pairs, mpl::bool_<false>,
......@@ -65,7 +65,7 @@ void generate_random_graph(GraphInterface& gi, size_t N,
if (uncorrelated)
{
run_action<graph_views>()
(gi, bind<void>(gen_random_graph(), _1, N,
(gi, bind<void>(gen_graph(), _1, N,
PythonFuncWrap(deg_sample),
no_parallel, no_self_loops,
seed, verbose, verify))();
......@@ -73,7 +73,7 @@ void generate_random_graph(GraphInterface& gi, size_t N,
else
{
run_action<graph_views>()
(gi, bind<void>(gen_random_graph(), _1, N,
(gi, bind<void>(gen_graph(), _1, N,
PythonFuncWrap(deg_sample),
no_parallel, no_self_loops,
seed, verbose, verify))();
......@@ -81,9 +81,9 @@ void generate_random_graph(GraphInterface& gi, size_t N,
gi.ReIndexEdges();
}
void random_rewire(GraphInterface& gi, string strat, bool self_loops,
bool parallel_edges, python::object corr_prob, size_t seed,
bool verbose);
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
python::object corr_prob, size_t seed, bool verbose);
void predecessor_graph(GraphInterface& gi, GraphInterface& gpi,
boost::any pred_map);
void line_graph(GraphInterface& gi, GraphInterface& lgi,
......@@ -107,7 +107,7 @@ using namespace boost::python;
BOOST_PYTHON_MODULE(libgraph_tool_generation)
{
def("gen_random_graph", &generate_random_graph);
def("gen_graph", &generate_graph);
def("random_rewire", &random_rewire);
def("predecessor_graph", &predecessor_graph);
def("line_graph", &line_graph);
......
......@@ -448,7 +448,7 @@ bool update_deg(size_t t_i, const pair<size_t, size_t>& deg, Vset& vset,
return true;
}
struct gen_random_graph
struct gen_graph
{
template <class Graph, class DegSample>
void operator()(Graph& g, size_t N, DegSample& deg_sample, bool no_parallel,
......
......@@ -48,37 +48,43 @@ private:
python::object _o;
};
void random_rewire(GraphInterface& gi, string strat, bool self_loops,
bool parallel_edges, python::object corr_prob, size_t seed,
bool verbose)
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
python::object corr_prob, size_t seed, bool verbose)
{
rng_t rng(static_cast<rng_t::result_type>(seed));
PythonFuncWrap corr(corr_prob);
size_t pcount = 0;
if (strat == "erdos")
run_action<graph_tool::detail::never_reversed>()
(gi, boost::bind<void>(graph_rewire<ErdosRewireStrategy>(),
_1, gi.GetEdgeIndex(), boost::ref(corr),
boost::ref(rng), self_loops, parallel_edges,
verbose))();
self_loops, parallel_edges,
make_pair(niter, no_sweep), verbose,
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "uncorrelated")
run_action<graph_tool::detail::never_reversed>()
(gi, boost::bind<void>(graph_rewire<RandomRewireStrategy>(),
_1, gi.GetEdgeIndex(), boost::ref(corr),
boost::ref(rng), self_loops, parallel_edges,
verbose))();
self_loops, parallel_edges,
make_pair(niter, no_sweep), verbose,
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "correlated")
run_action<graph_tool::detail::never_reversed>()
(gi, boost::bind<void>(graph_rewire<CorrelatedRewireStrategy>(),
_1, gi.GetEdgeIndex(), boost::ref(corr),
boost::ref(rng), self_loops, parallel_edges,
verbose))();
self_loops, parallel_edges,
make_pair(niter, no_sweep), verbose,
boost::ref(pcount), boost::ref(rng)))();
else if (strat == "probabilistic")
run_action<>()
(gi, boost::bind<void>(graph_rewire<ProbabilisticRewireStrategy>(),
_1, gi.GetEdgeIndex(), boost::ref(corr),
boost::ref(rng), self_loops, parallel_edges,
verbose))();
self_loops, parallel_edges,
make_pair(niter, no_sweep), verbose,
boost::ref(pcount), boost::ref(rng)))();
else
throw ValueException("invalid random rewire strategy: " + strat);
return pcount;
}
This diff is collapsed.
......@@ -37,34 +37,98 @@ template <class ValueType>
class Sampler
{
public:
Sampler(bool biased=false): _biased(biased), _erased_prob(0) {}
Sampler() {}
template <class Iterator>
Sampler(Iterator iter, Iterator end):
_biased(false)
Sampler(Iterator iter, Iterator end)
{
for(; iter != end; ++iter)
Insert(*iter);
}
void Insert(const ValueType& v)
{
_candidates.push_back(v);
_candidates_set.insert(make_pair(v, _candidates.size() - 1));
}
bool HasValue(const ValueType& v)
{
typeof(_candidates_set.begin()) iter, end;
tie(iter, end) = _candidates_set.equal_range(v);
return (iter != end);
}
void Remove(const ValueType& v)
{
typeof(_candidates_set.begin()) iter, back;
iter = _candidates_set.find(v);
if (iter == _candidates_set.end())
return;
back = _candidates_set.find(_candidates.back());
size_t index = iter->second;
swap(_candidates[index], _candidates.back());
_candidates.pop_back();
if (!_candidates.empty() && back != iter)
{
_candidates.push_back(*iter);
_candidates_set.insert(make_pair(*iter, _candidates.size()-1));
_candidates_set.erase(back);
_candidates_set.insert(make_pair(_candidates[index], index));
}
//assert(!_candidates.empty());
_candidates_set.erase(iter);
}
void Insert(const ValueType& v, double p = 0.0)
bool Empty()
{
_candidates.push_back(v);
_candidates_set.insert(make_pair(v, _candidates.size()-1));
if (_biased)
return _candidates.empty();
}
size_t Size()
{
return _candidates.size();
}
ValueType operator()(rng_t& rng, bool remove = false)
{
//assert(!_candidates.empty());
tr1::uniform_int<> sample(0, _candidates.size() - 1);
int i = sample(rng);
if (remove)
{
if (_probs.size() > 0)
_probs.push_back(_probs.back()+p);
else
_probs.push_back(p);
_erased.push_back(false);
swap(_candidates[i], _candidates.back());
ValueType ret = _candidates.back();
_candidates.pop_back();
return ret;
}
else
{
return _candidates[i];
}
}
private:
vector<ValueType> _candidates;
tr1::unordered_multimap<ValueType, size_t, hash<ValueType> >
_candidates_set;
};
template <class ValueType>
class WeightedSampler
{
public:
void Insert(const ValueType& v, double p)
{
_candidates.push_back(make_pair(v, p));
_candidates_set.insert(make_pair(v, _candidates.size() - 1));
_erased.push_back(false);
_rebuild = true;
}
bool HasValue(const ValueType& v)
{
typeof(_candidates_set.begin()) iter, end;
......@@ -76,36 +140,23 @@ public:
{
typeof(_candidates_set.begin()) iter, end, temp;
tie(iter, end) = _candidates_set.equal_range(v);
//assert(iter != end);
if (iter == end)
return;
if (_biased)
while(_erased[iter->second])
{
while(_erased[iter->second])
{
temp = iter++;
_candidates_set.erase(temp);
}
size_t index = iter->second;
_erased[index] = true;
_erased_prob += (index > 0) ?
_probs[index]-_probs[index-1] : _probs[index];
temp = iter++;
_candidates_set.erase(temp);
if (iter == end)
return;
}
else
{
size_t index = iter->second;
temp = _candidates_set.find(_candidates.back());
swap(_candidates[index], _candidates.back());
_candidates.pop_back();
if (!_candidates.empty() && temp != iter)
{
_candidates_set.erase(temp);
_candidates_set.insert(make_pair(_candidates[index], index));
}
}
_candidates_set.erase(iter);
clean();
size_t index = iter->second;
_erased[index] = true;
_erased_prob += _candidates[index].second;
if (_erased_prob >= 0.3)
_rebuild = true;
}
bool Empty()
......@@ -118,111 +169,108 @@ public:
return _candidates.size();
}
ValueType operator()(rng_t& rng, bool remove = false)
void BuildTable()
{
//assert(!_candidates.empty());
if (!_biased)
// remove possibly erased elements
size_t i = 0;
while (i < _candidates.size())
{
tr1::uniform_int<> sample(0, _candidates.size() - 1);
int i = sample(rng);
if (remove)
if (_erased[i])
{
swap(_candidates[i], _candidates.back());
ValueType ret = _candidates.back();
swap(_erased[i], _erased.back());
_candidates.pop_back();
return ret;
_erased.pop_back();
}
else
{
return _candidates[i];
++i;
}
}
else
_erased_prob = 0;
vector<pair<size_t, double> > remainder;
_alias.resize(_candidates.size());
double P_sum = 0;
for (size_t i = 0; i < _candidates.size(); ++i)
P_sum += _candidates[i].second;
size_t N = _candidates.size();
double P = 1.0 / N;
for (size_t i = 0; i < _candidates.size(); ++i)
{
_candidates[i].second /= P_sum;
double pi = _candidates[i].second;
if (pi > P)
remainder.push_back(make_pair(i, pi - P));
_alias[i] = make_pair(i, .1);
}
for (size_t i = 0; i < _candidates.size(); ++i)
{
size_t i = 0;
do
double pi = _candidates[i].second;
if (pi < P)
{
if (_probs.back() > 0)
{
tr1::variate_generator<rng_t&, tr1::uniform_real<> >
sample(rng, tr1::uniform_real<>(0.0, _probs.back()));
double r = sample();
i = upper_bound(_probs.begin(), _probs.end(), r) -
_probs.begin();
}
else
for (size_t j = 0; j < remainder.size(); ++j)
{
// all probabilities are zero... sample randomly.
tr1::uniform_int<size_t>
sample(0, _candidates_set.size()-1);
size_t j = sample(rng), count = 0;
for (typeof(_candidates_set.begin()) iter =
_candidates_set.begin();
iter != _candidates_set.end(); ++iter)
if (remainder[j].second >= P - pi)
{
if (count == j)
_alias[i] = make_pair(remainder[j].first, pi * N);
remainder[j].second -= P - pi;
if (remainder[j].second <= 0)
{
i = iter->second;
break;
swap(remainder[j], remainder.back());
remainder.pop_back();
}
count++;
break;
}
}
} while (_erased[i]);
if (remove)
{
_erased[i] = true;
_erased_prob += (i > 0) ? _probs[i] - _probs[i-1] : _probs[i];
clean();
}
return _candidates[i];
}
_rebuild = false;
}
void clean()
ValueType operator()(rng_t& rng, bool remove = false)
{
// if too many elements were erased, we need to make things less sparse
if (_biased && !_candidates_set.empty() &&
_erased_prob >= _probs.back()/3)
{
for (int i = int(_probs.size()) - 1; i > 0; --i)
_probs[i] -= _probs[i-1];
for (size_t i = 0; i < _candidates.size(); ++i)
{
while (i < _erased.size() && _erased[i])
{
swap(_candidates[i], _candidates.back());
_candidates.pop_back();
swap(_probs[i], _probs.back());
_probs.pop_back();
if (_rebuild)
BuildTable();
swap(_erased[i], _erased.back());
_erased.pop_back();
}
}
tr1::variate_generator<rng_t&, tr1::uniform_real<> >
sample(rng, tr1::uniform_real<>(0.0, 1.0));
size_t i;
do
{
double r = sample() * _candidates.size();
i = floor(r); // in [0, n-1]
double x = r - i; // in [0, 1)
for (size_t i = 1; i < _probs.size(); i++)
_probs[i] += _probs[i-1];
if (x > _alias[i].second)
i = _alias[i].first;
}
while (_erased[i]);
_candidates_set.clear();
for (size_t i = 0; i < _candidates.size(); i++)
_candidates_set.insert(make_pair(_candidates[i],i));
_erased_prob = 0.0;
if (remove)
{
_erased[i] = true;
_erased_prob += _candidates[i].second;
if (_erased_prob >= 0.3)
_rebuild = true;
}
return _candidates[i].first;
}
private:
bool _biased;
vector<ValueType> _candidates;
vector<pair<ValueType, double> > _candidates;
tr1::unordered_multimap<ValueType, size_t, hash<ValueType> >
_candidates_set;
vector<double> _probs;
vector<pair<size_t, double> > _alias;
vector<uint8_t> _erased;
double _erased_prob;
bool _erased_prob;
bool _rebuild;
};
} // namespace graph_tool
......
......@@ -132,23 +132,23 @@ def pagerank(g, damping=0.85, pers=None, weight=None, prop=None, epsilon=1e-6,
>>> g = gt.random_graph(100, lambda: (poisson(3), poisson(3)))
>>> pr = gt.pagerank(g)
>>> print pr.a
[ 0.00782362 0.01642353 0.00420484 0.0038825 0.0015 0.01145378
0.00514203 0.00593481 0.00743705 0.00785063 0.00446447 0.00440222
0.00684158 0.00463226 0.00518308 0.0056288 0.01207045 0.00617264
0.00958574 0.00817165 0.01041552 0.00508079 0.0015 0.00249411
0.00842537 0.00293099 0.00873296 0.001755 0.003371 0.00817938
0.00406813 0.00576584 0.01188752 0.00674565 0.00758134 0.00855306
0.00975204 0.00823918 0.00209855 0.00753858 0.0015 0.001925
0.00593262 0.00603431 0.00977679 0.00707922 0.00529399 0.01048882
0.001755 0.0111949 0.0032813 0.01591077 0.00407595 0.01015827
0.00383036 0.01024311 0.00714593 0.00379142 0.00955729 0.001925
0.00737848 0.00352088 0.00654273 0.00676324 0.00353259 0.0015
0.00809045 0.00864939 0.00626611 0.00632213 0.00939761 0.0015
0.00584767 0.0077272 0.00688094 0.01010526 0.01071083 0.00550524
0.0045327 0.00577072 0.00337711 0.00637928 0.01295484 0.0015
0.00265875 0.003245 0.00203456 0.00969993 0.00908983 0.00759961
0.00428542 0.00674196 0.0043264 0.01339053 0.00570051 0.00253539
0.01464169 0.00505055 0.01919599 0.01413612]
[ 0.00865316 0.0054067 0.00406312 0.00426668 0.0015 0.00991696
0.00550065 0.00936397 0.00347917 0.00731864 0.00689843 0.00286274
0.00508731 0.01020047 0.00562247 0.00584915 0.02457086 0.00438568
0.0057385 0.00621745 0.001755 0.0045073 0.0015 0.00225167
0.00698342 0.00206302 0.01094466 0.001925 0.00710093 0.00519877
0.00460646 0.00994648 0.01005248 0.00904629 0.00676221 0.00789208
0.00933103 0.00301154 0.00264951 0.00842812 0.0015 0.00191034
0.00594069 0.00884372 0.00453417 0.00388987 0.00317433 0.0086067
0.00385394 0.00672702 0.00258411 0.01468262 0.00454 0.00381159
0.00402607 0.00451133 0.00480966 0.00811557 0.00571949 0.00317433
0.00856838 0.00280517 0.00280563 0.00906324 0.00614421 0.0015
0.00292034 0.00479769 0.00552694 0.00604799 0.0115922 0.0015
0.00676183 0.00695336 0.01023352 0.01737541 0.00451443 0.00197688
0.00553866 0.00486233 0.0078653 0.00867599 0.01248092 0.0015
0.00399605 0.00399605 0.00881571 0.00638008 0.01056944 0.00353724
0.00249869 0.00684919 0.00241374 0.01061397 0.00673569 0.00590937
0.01004638 0.00331612 0.00926359 0.00460809]
Now with a personalization vector, and edge weights:
......@@ -159,23 +159,23 @@ def pagerank(g, damping=0.85, pers=None, weight=None, prop=None, epsilon=1e-6,
>>> p.a /= p.a.sum()
>>> pr = gt.pagerank(g, pers=p, weight=w)
>>> print pr.a
[ 0.01693559 0.01316915 0.00369907 0.00245658 0.00092715 0.01380721
0.00703909 0.00407121 0.00816254 0.00880131 0.0035886 0.0050914
0.00815843 0.00624021 0.0069828 0.00647311 0.01260669 0.00884083
0.01324534 0.01103024 0.01417902 0.00309344 0.00250025 0.00153889
0.00969556 0.00491575 0.00552323 0.00300698 0.00327355 0.00829017
0.00274335 0.00440865 0.01436394 0.00671045 0.00788395 0.01092875
0.0126331 0.00789263 0.00422443 0.00745144 0.00148972 0.00198663
0.00476339 0.00800871 0.01468149 0.00971962 0.00446663 0.01333257
0.00085768 0.01044298 0.00286075 0.02119469 0.00406517 0.01317145
0.00280023 0.0143227 0.00867722 0.00234863 0.01180399 0.00298827
0.0049022 0.00532752 0.00603759 0.00766617 0.00293739 0.00238803
0.00863735 0.01110095 0.00660816 0.00170262 0.00884469 0.00300867
0.00441168 0.00630793 0.00424727 0.00906709 0.0135949 0.00890726
0.00267835 0.00615783 0.0045653 0.00720592 0.00996495 0.0009367
0.00233309 0.00265909 0.00211686 0.01277934 0.01284484 0.00625721
0.00487027 0.00852522 0.00403389 0.01817233 0.00573321 0.0038696
0.00932334 0.00515806 0.01601592 0.0167547 ]
[ 0.00712999 0.00663336 0.00685722 0.00402663 0.00092715 0.01021926
0.00269502 0.0073301 0.00449892 0.00582793 0.00580542 0.00275149
0.00676363 0.01157972 0.00486918 0.00616345 0.02506695 0.00607967
0.00553375 0.00359075 0.00293808 0.00362247 0.00250025 0.00186946
0.00895516 0.00318147 0.01489786 0.00312436 0.0074751 0.0040342
0.006254 0.00687051 0.0098073 0.01076278 0.00887077 0.00806759
0.00969532 0.00252648 0.00278688 0.00972144 0.00148972 0.00215428
0.00713602 0.00559849 0.00495517 0.00457118 0.00323767 0.01257406
0.00120179 0.00514838 0.00130655 0.01724465 0.00343819 0.00420962
0.00297617 0.00588287 0.00657206 0.00775082 0.00758217 0.00433776
0.00576829 0.00464595 0.00307274 0.00585795 0.00745881 0.00238803
0.00230431 0.00437046 0.00492464 0.00275414 0.01524646 0.00300867
0.00816665 0.00548853 0.00874738 0.01871498 0.00216776 0.00245196
0.00308878 0.00646323 0.01287978 0.00911384 0.01628604 0.0009367
0.00222119 0.00864202 0.01199119 0.01126539 0.01086846 0.00309224
0.0020319 0.00659422 0.00226965 0.0134399 0.01094141 0.00732916
0.00489314 0.0030402 0.00783914 0.00278588]
References
----------
......@@ -259,31 +259,23 @@ def betweenness(g, vprop=None, eprop=None, weight=None, norm=True):
>>> g = gt.random_graph(100, lambda: (poisson(3), poisson(3)))
>>> vb, eb = gt.betweenness(g)
>>> print vb.a
[ 2.65012897e-02 1.04414799e-01 2.73374899e-02 1.52782183e-02
0.00000000e+00 2.74548352e-02 3.54680121e-02 3.72671558e-02
2.39732112e-02 2.34942149e-02 2.97950758e-02 4.08351383e-02
4.31702840e-02 1.90317902e-02 3.66879750e-02 8.65571818e-03
0.00000000e+00 3.74046494e-02 4.22428130e-02 2.10503176e-02
1.39558854e-02 8.40349783e-03 0.00000000e+00 4.45784374e-03
3.38671970e-02 1.72390157e-02 4.82232543e-02 1.03071532e-04
1.42200266e-02 4.82793598e-02 1.82020235e-02 0.00000000e+00
7.04969679e-02 2.31267158e-02 6.42817952e-02 3.71139131e-02
3.81618985e-02 4.06231715e-02 2.16376594e-03 2.44758076e-02
0.00000000e+00 6.86198722e-03 1.36132952e-02 1.73886977e-02
2.30213129e-02 4.44999980e-02 0.00000000e+00 1.40589569e-02
0.00000000e+00 4.74213177e-02 2.65427674e-02 1.05684330e-01
6.30552365e-03 2.86320444e-02 4.50079022e-03 7.76843152e-02
2.88642900e-02 3.52207159e-02 2.01852506e-02 9.26784855e-04
4.35733012e-02 1.84745904e-02 1.35102237e-02 2.69638287e-02
1.88247064e-02 0.00000000e+00 2.03784688e-02 4.14981678e-02
1.79538495e-02 1.12983577e-02 3.23765203e-02 0.00000000e+00
3.99771399e-02 2.85164571e-03 2.18967289e-02 3.96111705e-02
3.40096863e-02 1.72800650e-02 1.36861815e-02 0.00000000e+00
1.19328203e-02 1.71726485e-02 0.00000000e+00 0.00000000e+00
6.33251858e-03 4.64324980e-03 1.33084980e-03 9.89021626e-02
3.52934995e-02 2.96267777e-02 1.73480268e-02 3.07545000e-02
2.47891161e-02 3.32486832e-02 7.45403501e-03 1.46792267e-02
0.00000000e+00 3.35642472e-02 8.78597450e-02 3.94517740e-02]
[ 0.04889806 0.07181892 0.0256799 0.02885791 0. 0.05060927
0.04490836 0.03763462 0.02033383 0.03163202 0.02641248 0.03171598
0.03771112 0.02194663 0.0374907 0.01072567 0. 0.03079281
0.05409258 0.00163434 0.00051978 0.01045902 0. 0.00796784
0.0494527 0.00647576 0.03708252 0.00304503 0.0663657 0.03903257
0.03305169 0. 0.07787098 0.03938866 0.08577116 0.020183
0.06024004 0.01004935 0.0443127 0.06397736 0. 0.00363548
0.01742486 0.03216543 0.01918144 0.02059159 0. 0.01476213
0. 0.0466751 0.01072612 0.10288046 0.00563973 0.03850413
0.00629595 0.01292137 0.0537963 0.04454985 0.01227018 0.00729488
0.02092959 0.02308238 0.00712703 0.02193975 0.03823342 0.
0.00995364 0.04023839 0.0312708 0.0111312 0.00228516 0.
0.09659583 0.01327402 0.05792071 0.08606828 0.0143541 0.00221604
0.02144698 0. 0.04023879 0.00715758 0. 0.