Commit 54880035 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Fix bias bug in random_graph() in correlated mode

Target degrees were not being sampled correctly from their probability.
parent b0c2a3c3
......@@ -83,7 +83,7 @@ template <class ValueType>
class Sampler
{
public:
Sampler(bool biased=false): _biased(biased) {}
Sampler(bool biased=false): _biased(biased), _erased_prob(0) {}
template <class Iterator>
Sampler(Iterator iter, Iterator end):
......@@ -130,6 +130,8 @@ public:
size_t index = iter->second;
_erased[index] = true;
_erased_prob += (index > 0) ?
_probs[index]-_probs[index-1] : _probs[index];
}
else
{
......@@ -145,33 +147,7 @@ public:
}
_candidates_set.erase(iter);
// if too many elements were erased, we need to make things less sparse
if (!_candidates_set.empty() &&
_candidates_set.size() < _candidates.size()/2)
{
for (int i = _probs.size() - 1; i > 0; --i)
_probs[i] -= _probs[i-1];
for (int i = _candidates.size() - 1; i >= 0; --i)
{
if (_erased[i])
{
_candidates.erase(_candidates.begin() + i);
_probs.erase(_probs.begin() + i);
}
}
for (size_t i = 1; i < _probs.size(); i++)
_probs[i] += _probs[i-1];
_candidates_set.clear();
_erased.resize(_candidates.size());
for (size_t i = 0; i < _candidates.size(); i++)
{
_candidates_set.insert(make_pair(_candidates[i],i));
_erased[i] = false;
}
}
clean();
}
ValueType operator()(rng_t& rng, bool remove = false)
......@@ -195,61 +171,89 @@ public:
else
{
size_t i;
if (_probs.back() > 0)
{
tr1::variate_generator<rng_t, tr1::uniform_real<> >
sample(rng, tr1::uniform_real<>(0.0, _probs.back()));
double r = sample();
i = lower_bound(_probs.begin(), _probs.end(), r) -
_probs.begin();
}
else
do
{
// all probabilities are zero... sample randomly.
tr1::uniform_int<size_t> sample(0, _candidates_set.size()-1);
size_t j = sample(rng), count = 0;
for (typeof(_candidates_set.begin()) iter =
_candidates_set.begin();
iter != _candidates_set.end(); ++iter)
if (_probs.back() > 0)
{
tr1::variate_generator<rng_t&, tr1::uniform_real<> >
sample(rng, tr1::uniform_real<>(0.0, _probs.back()));
double r = sample();
i = upper_bound(_probs.begin(), _probs.end(),r) -
_probs.begin();
}
else
{
if (count == j)
// all probabilities are zero... sample randomly.
tr1::uniform_int<size_t>
sample(0, _candidates_set.size()-1);
size_t j = sample(rng), count = 0;
for (typeof(_candidates_set.begin()) iter =
_candidates_set.begin();
iter != _candidates_set.end(); ++iter)
{
i = iter->second;
break;
if (count == j)
{
i = iter->second;
break;
}
count++;
}
count++;
}
}
} while (_erased[i]);
for (; i < _probs.size(); ++i)
if (!_erased[i])
break;
if (i == _probs.size())
for (i = _probs.size()-1; i < _probs.size(); --i)
if (!_erased[i])
break;
if (i >= _probs.size())
if (remove)
{
size_t n = 0;
for (size_t j = 0; j < _erased.size(); ++j)
if (_erased[j])
n++;
_erased[i] = true;
_erased_prob += (i > 0) ? _probs[i] - _probs[i-1] : _probs[i];
clean();
}
if (remove)
_erased[i] = true;
return _candidates[i];
}
}
void clean()
{
// if too many elements were erased, we need to make things less sparse
if (_biased && !_candidates_set.empty() &&
_erased_prob >= _probs.back()/3)
{
for (int i = _probs.size() - 1; i > 0; --i)
_probs[i] -= _probs[i-1];
for (int i = 0; i < _candidates.size(); ++i)
{
while (i < _erased.size() && _erased[i])
{
swap(_candidates[i], _candidates.back());
_candidates.pop_back();
swap(_probs[i], _probs.back());
_probs.pop_back();
swap(_erased[i], _erased.back());
_erased.pop_back();
}
}
for (size_t i = 1; i < _probs.size(); i++)
_probs[i] += _probs[i-1];
_candidates_set.clear();
for (size_t i = 0; i < _candidates.size(); i++)
_candidates_set.insert(make_pair(_candidates[i],i));
_erased_prob = 0.0;
}
}
private:
bool _biased;
vector<ValueType> _candidates;
tr1::unordered_multimap<ValueType, size_t, hash<ValueType> >
_candidates_set;
vector<double> _probs;
vector<bool> _erased;
vector<uint8_t> _erased;
double _erased_prob;
};
// used for verbose display
......
......@@ -82,8 +82,8 @@ def random_graph(N, deg_sampler, deg_corr=None, directed=True,
The uncorrelated case, the complexity is :math:`O(V+E)`. For the correlated
case the worst-case complexity is :math:`O(V^2)`, but the typical case has
complexity :math:`O(V+N_k^2)`, where :math:`N_k < V` is the number of
different degrees sampled (or in,out-degree pairs).
complexity :math:`O(V + E\log N_k + N_k^2)`, where :math:`N_k < V` is the
number of different degrees sampled (or in,out-degree pairs).
Examples
--------
......@@ -114,7 +114,7 @@ def random_graph(N, deg_sampler, deg_corr=None, directed=True,
>>> g = gt.random_graph(1000, lambda: sample_k(40),
... lambda i,k: 1.0/(1+abs(i-k)), directed=False)
>>> gt.scalar_assortativity(g, "out")
(0.52810631736984548, 0.012618649197538264)
(0.59472179721535989, 0.011919463022240388)
The following samples an in,out-degree pair from the joint distribution:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment