Commit 6063ece0 authored by Tiago Peixoto's avatar Tiago Peixoto

Rewritten graph generation routine.

It is now much simpler, and works better.
parent 90462548
## Process this file with automake to produce Makefile.in
AM_CPPFLAGS =\
-I. -I.. \
-I $(pythondir)/numpy/core/include/numpy/ \
-I../boost-workaround \
-DHAVE_CONFIG_H
AM_CXXFLAGS =\
-Wall \
$(PYTHON_CPPFLAGS) \
$(BOOST_CPPFLAGS)
AM_CFLAGS=$(AM_CXXFLAGS)
libgraph_tool_generationdir = $(pythondir)/graph_tool/generation
libgraph_tool_generation_LTLIBRARIES = libgraph_tool_generation.la
libgraph_tool_generation_la_includedir = $(pythondir)/graph_tool/include
libgraph_tool_generation_la_SOURCES = \
graph_generation.cc
libgraph_tool_generation_la_include_HEADERS = \
graph_generation.hh
libgraph_tool_generation_la_LIBADD = \
$(PYTHON_LDFLAGS) \
$(BOOST_LDFLAGS) \
$(OPENMP_LDFLAGS) \
-lboost_python \
-lboost_iostreams \
-lexpat
# needed for typeinfo objects to work across DSO boundaries.
# see http://gcc.gnu.org/faq.html#dso
libgraph_tool_generation_la_LDFLAGS = \
-module \
-avoid-version \
-export-dynamic \
-no-undefined \
-Wl,-E
......@@ -15,22 +15,11 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <tr1/unordered_set>
#include <boost/lambda/bind.hpp>
#include <boost/random.hpp>
#include <boost/python.hpp>
#include <boost/function.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/device/file.hpp>
#include <iomanip>
#include <map>
#include "graph.hh"
#include "histogram.hh"
#include "graph_util.hh"
#include "graph_filtering.hh"
#include "graph_generation.hh"
#include <boost/python.hpp>
using namespace std;
using namespace boost;
......@@ -39,536 +28,78 @@ using namespace graph_tool;
typedef boost::mt19937 rng_t;
// this will sample a (j,k) pair from a pjk distribution given a ceil function
// and its inverse
template <class Distribution, class Ceil, class InvCeil>
struct sample_from_distribution
{
sample_from_distribution(Distribution &dist, Ceil& ceil, InvCeil &inv_ceil,
double bound, rng_t& rng)
: _dist(dist), _ceil(ceil), _inv_ceil(inv_ceil), _bound(bound),
_rng(rng), _uniform_p(0.0, 1.0) {}
pair<size_t, size_t> operator()()
{
// sample j,k from ceil
size_t j,k;
double u;
do
{
tie(j,k) = _inv_ceil(_uniform_p(_rng), _uniform_p(_rng));
u = _uniform_p(_rng);
}
while (u > _dist(j,k)/(_bound*_ceil(j,k)));
return make_pair(j,k);
}
Distribution& _dist;
Ceil& _ceil;
InvCeil& _inv_ceil;
double _bound;
rng_t &_rng;
boost::uniform_real<double> _uniform_p;
};
// desired vertex type, with desired j,k values and the index in the real graph
struct dvertex_t
{
dvertex_t() {}
dvertex_t(size_t in, size_t out): in_degree(in), out_degree(out) {}
dvertex_t(const pair<size_t,size_t>& deg): in_degree(deg.first),
out_degree(deg.second) {}
size_t index, in_degree, out_degree;
bool operator==(const dvertex_t& other) const {return other.index == index;}
};
inline std::size_t hash_value(const dvertex_t& v)
{
size_t h = hash_value(v.in_degree);
hash_combine(h, v.out_degree);
return h;
}
inline size_t dist(const dvertex_t& a, const dvertex_t& b)
{
return int(a.in_degree-b.in_degree)*int(a.in_degree-b.in_degree) +
int(a.out_degree-b.out_degree)*int(a.out_degree-b.out_degree);
}
struct total_deg_comp
{
bool operator()(const pair<size_t,size_t>& a, const pair<size_t,size_t>& b)
{
return a.first + a.second < b.first + b.second;
}
};
// this structure will keep the existing (j,k) pairs in the graph in a matrix,
// so that the nearest (j,k) to a given target can be found easily.
class degree_matrix_t
class PythonFuncWrap
{
public:
degree_matrix_t(size_t N, size_t minj, size_t mink, size_t maxj,
size_t maxk)
{
_L = max(size_t(pow(2,ceil(log2(sqrt(N))))),size_t(2));
_minj = minj;
_mink = mink;
_maxj = max(maxj,_L);
_maxk = max(maxk,_L);
_bins.resize(_L, vector<vector<pair<size_t,size_t> > >(_L));
_high_bins.resize(size_t(log2(_L)));
for(size_t i = 0; i < _high_bins.size(); ++i)
_high_bins[i].resize(_L/(1<<(i+1)), vector<size_t>(_L/(1<<(i+1))));
}
void insert(const pair<size_t, size_t>& v)
{
size_t j_bin, k_bin;
tie(j_bin, k_bin) = get_bin(v.first, v.second, 0);
_bins[j_bin][k_bin].push_back(v);
for (size_t i = 0; i < _high_bins.size(); ++i)
{
size_t hj,hk;
tie(hj,hk) = get_bin(j_bin,k_bin, i+1);
_high_bins[i][hj][hk]++;
}
}
void erase(const pair<size_t,size_t>& v)
{
size_t j_bin, k_bin;
tie(j_bin, k_bin) = get_bin(v.first, v.second, 0);
for(size_t i = 0; i < _bins[j_bin][k_bin].size(); ++i)
{
if (_bins[j_bin][k_bin][i] == v)
{
_bins[j_bin][k_bin].erase(_bins[j_bin][k_bin].begin()+i);
break;
}
}
for (size_t i = 0; i < _high_bins.size(); ++i)
{
size_t hj,hk;
tie(hj,hk) = get_bin(j_bin,k_bin, i+1);
_high_bins[i][hj][hk]--;
}
PythonFuncWrap(python::object o): _o(o) {}
}
pair<size_t,size_t> find_closest(size_t j, size_t k, rng_t& rng)
pair<size_t, size_t> operator()() const
{
vector<pair<size_t,size_t> > candidates;
size_t level;
// find the appropriate level on which to operate
for (level = _high_bins.size(); level <= 0; --level)
{
size_t hj, hk;
tie(hj,hk) = get_bin(j,k,level);
if (get_bin_count(hj,hk,level) == 0)
{
if (level < _high_bins.size())
level++;
break;
}
}
size_t j_bin, k_bin;
tie(j_bin, k_bin) = get_bin(j, k, level);
for (size_t hj = ((j_bin>0)?j_bin-1:j_bin);
hj < j_bin + 1 && hj <= get_bin(_maxj, _maxk, level).first; ++hj)
for (size_t hk = ((k_bin>0)?k_bin-1:k_bin);
hk < k_bin + 1 && hk <= get_bin(_maxj, _maxk, level).second;
++hk)
search_bin(hj,hk,j,k,level,candidates);
uniform_int<size_t> sample(0, candidates.size() - 1);
return candidates[sample(rng)];
python::object ret = _o();
return python::extract<pair<size_t,size_t> >(ret);
}
private:
pair<size_t,size_t> get_bin(size_t j, size_t k, size_t level)
size_t operator()(bool) const
{
if (level == 0)
return make_pair(((j-_minj)*(_L-1))/_maxj,
((k-_mink)*(_L-1))/_maxk);
pair<size_t, size_t> bin = get_bin(j,k,0);
bin.first /= 1 << level;
bin.second /= 1 << level;
return bin;
python::object ret = _o();
return python::extract<size_t>(ret);
}
size_t get_bin_count(size_t bin_j, size_t bin_k, size_t level)
size_t operator()(size_t k) const
{
if (level == 0)
return _bins[bin_j][bin_k].size();
else
return _high_bins[level-1][bin_j][bin_k];
python::object ret = _o(k);
return python::extract<size_t>(ret);
}
void search_bin(size_t hj, size_t hk, size_t j, size_t k, size_t level,
vector<pair<size_t,size_t> >& candidates)
pair<size_t, size_t> operator()(pair<size_t, size_t> deg) const
{
size_t w = 1 << level;
for (size_t j_bin = hj*w; j_bin < (hj+1)*w; ++j_bin)
for (size_t k_bin = hk*w; k_bin < (hk+1)*w; ++k_bin)
{
for (size_t i = 0; i < _bins[j_bin][k_bin].size(); ++i)
{
pair<size_t, size_t>& v = _bins[j_bin][k_bin][i];
if (candidates.empty())
{
candidates.push_back(v);
continue;
}
if (dist(dvertex_t(v), dvertex_t(j,k)) <
dist(dvertex_t(candidates.front()),dvertex_t(j,k)))
{
candidates.clear();
candidates.push_back(v);
}
else if (dist(dvertex_t(v), dvertex_t(j,k)) ==
dist(dvertex_t(candidates.front()),dvertex_t(j,k)))
{
candidates.push_back(v);
}
}
}
python::object ret = _o(deg.first, deg.second);
return python::extract<pair<size_t,size_t> >(ret);
}
size_t _L;
vector<vector<vector<pair<size_t,size_t> > > > _bins;
vector<vector<vector<size_t> > > _high_bins;
size_t _minj;
size_t _mink;
size_t _maxj;
size_t _maxk;
};
struct python_function
{
python_function(python::object o): _o(o) {}
double operator()(size_t j, size_t k)
{
return python::extract<double>(_o(j,k));
}
double operator()(size_t jl, size_t kl, size_t j, size_t k)
{
return python::extract<double>(_o(jl,kl,j,k));
}
pair<size_t,size_t> operator()(double r1, double r2)
{
python::object retval = _o(r1,r2);
return make_pair(size_t(max(int(python::extract<int>(retval[0])),0)),
size_t(max(int(python::extract<int>(retval[1])),0)));
}
pair<size_t,size_t> operator()(double r1, double r2, size_t j, size_t k)
{
python::object retval = _o(r1,r2,j,k);
return make_pair(size_t(max(int(python::extract<int>(retval[0])),0)),
size_t(max(int(python::extract<int>(retval[1])),0)));
}
private:
python::object _o;
};
// generates a directed graph with given pjk and degree correlation
void GraphInterface::GenerateCorrelatedConfigurationalModel
(size_t N, python::object ppjk, python::object pceil_pjk,
python::object pinv_ceil_pjk, double ceil_pjk_bound, python::object pcorr,
python::object pceil_corr, python::object pinv_ceil_corr,
double ceil_corr_bound, bool undirected_corr, size_t seed, bool verbose)
void generate_random_graph(GraphInterface& gi, size_t N,
python::object deg_sample,
python::object corr_deg_sample,
bool uncorrelated, bool no_parallel,
bool no_self_loops, bool undirected,
size_t seed, bool verbose)
{
typedef function<double (size_t j, size_t k)> pjk_t;
typedef function<pair<size_t,size_t> (double r1, double r2)> inv_ceil_t;
typedef function<double (size_t jl, size_t kl, size_t j, size_t k)> corr_t;
typedef function<pair<size_t,size_t>(double r1,
double r2,
size_t j,
size_t k)> inv_corr_t;
pjk_t pjk = python_function(ppjk);
pjk_t ceil_pjk = python_function(pceil_pjk);
inv_ceil_t inv_ceil_pjk = python_function(pinv_ceil_pjk);
corr_t corr = python_function(pcorr);
corr_t ceil_corr = python_function(pceil_corr);
inv_corr_t inv_ceil_corr = python_function(pinv_ceil_corr);
_mg.clear();
_properties = dynamic_properties();
rng_t rng(static_cast<rng_t::result_type>(seed));
// sample the N (j,k) pairs
sample_from_distribution<pjk_t, pjk_t, inv_ceil_t>
pjk_sample(pjk, ceil_pjk, inv_ceil_pjk, ceil_pjk_bound, rng);
vector<dvertex_t> vertices(N);
size_t sum_j=0, sum_k=0, min_j=0, min_k=0, max_j=0, max_k=0;
if (verbose)
{
cout << "adding vertices: " << flush;
}
for(size_t i = 0; i < N; ++i)
{
if (verbose)
{
static stringstream str;
for (size_t j = 0; j < str.str().length(); ++j)
cout << "\b";
str.str("");
str << i+1 << " of " << N << " (" << (i+1)*100/N << "%)";
cout << str.str() << flush;
}
dvertex_t& v = vertices[i];
v.index = _vertex_index[add_vertex(_mg)];
tie(v.in_degree, v.out_degree) = pjk_sample();
sum_j += v.in_degree;
sum_k += v.out_degree;
min_j = min(v.in_degree,min_j);
min_k = min(v.out_degree,min_k);
max_j = max(v.in_degree,max_j);
max_k = max(v.out_degree,max_k);
}
if (verbose)
cout << "\nfixing average degrees: " << flush;
// <j> and <k> must be the same. Resample random pairs until this holds.
uniform_int<size_t> vertex_sample(0, N-1);
while (sum_j != sum_k)
{
dvertex_t& v = vertices[vertex_sample(rng)];
sum_j -= v.in_degree;
sum_k -= v.out_degree;
tie(v.in_degree, v.out_degree) = pjk_sample();
sum_j += v.in_degree;
sum_k += v.out_degree;
max_j = max(v.in_degree,max_j);
max_k = max(v.out_degree,max_k);
if (verbose)
{
static stringstream str;
for (size_t j = 0; j < str.str().length(); ++j)
cout << "\b";
for (size_t j = 0; j < str.str().length(); ++j)
cout << " ";
for (size_t j = 0; j < str.str().length(); ++j)
cout << "\b";
str.str("");
str << min(sum_j-sum_k, sum_k-sum_j);
cout << str.str() << flush;
}
}
size_t E = sum_k;
vector<dvertex_t> sources; // sources of edges
typedef tr1::unordered_multimap<pair<size_t,size_t>, dvertex_t,
hash<pair<size_t,size_t> > > targets_t;
targets_t targets; // vertices with j > 0
typedef tr1::unordered_set<pair<size_t,size_t>, hash<pair<size_t,size_t> > >
target_degrees_t;
target_degrees_t target_degrees; // existing (j,k) pairs
// fill up sources, targets and target_degrees
sources.reserve(E);
for(size_t i = 0; i < N; ++i)
{
for(size_t k = 0; k < vertices[i].out_degree; ++k)
sources.push_back(vertices[i]);
if (vertices[i].in_degree > 0)
{
targets.insert(make_pair(make_pair(vertices[i].in_degree,
vertices[i].out_degree),
vertices[i]));
target_degrees.insert(make_pair(vertices[i].in_degree,
vertices[i].out_degree));
}
}
typedef multiset<pair<size_t,size_t>, total_deg_comp> ordered_degrees_t;
ordered_degrees_t ordered_degrees; // (j,k) pairs ordered by (j+k), i.e,
// total degree
degree_matrix_t degree_matrix(target_degrees.size(),
min_j, min_k,
max_j, max_k); // (j,k) pairs layed out in a 2
// dimensional matrix
for(typeof(target_degrees.begin()) iter = target_degrees.begin();
iter != target_degrees.end(); ++iter)
if (undirected_corr)
ordered_degrees.insert(*iter);
else
degree_matrix.insert(*iter);
// shuffle sources
for (size_t i = 0; i < sources.size(); ++i)
{
uniform_int<size_t> source_sample(i, sources.size()-1);
swap(sources[i], sources[source_sample(rng)]);
}
if (verbose)
cout << "\nadding edges: " << flush;
// connect the sources to targets
uniform_real<double> sample_probability(0.0, 1.0);
for (size_t i = 0; i < sources.size(); ++i)
{
dvertex_t source = sources[i], target;
size_t j = source.in_degree;
size_t k = source.out_degree;
//choose the target vertex according to correlation
pjk_t prob_func = lambda::bind(corr,lambda::_1,lambda::_2,j,k);
pjk_t ceil = lambda::bind(ceil_corr,lambda::_1,lambda::_2,j,k);
inv_ceil_t inv_ceil = lambda::bind(inv_ceil_corr,
lambda::_1,lambda::_2,j,k);
sample_from_distribution<pjk_t, pjk_t, inv_ceil_t>
corr_sample(prob_func, ceil, inv_ceil, ceil_corr_bound, rng);
size_t jl,kl;
tie(jl,kl) = corr_sample(); // target (j,k)
target_degrees_t::iterator iter = target_degrees.find(make_pair(jl,kl));
if (iter != target_degrees.end())
{
target = targets.find(*iter)->second; // if an (jl,kl) pair exists,
// just use that
}
else
{
pair<size_t, size_t> deg;
if (undirected_corr)
{
// select the (j,k) pair with the closest total degree (j+k)
ordered_degrees_t::iterator upper;
upper = ordered_degrees.upper_bound(make_pair(jl,kl));
if (upper == ordered_degrees.end())
{
--upper;
deg = *upper;
}
else if (upper == ordered_degrees.begin())
{
deg = *upper;
}
else
{
ordered_degrees_t::iterator lower = upper;
--lower;
if (jl + kl - (lower->first + lower->second) <
upper->first + upper->second - (jl + kl))
deg = *lower;
else if (jl + kl - (lower->first + lower->second) !=
upper->first + upper->second - (jl + kl))
deg = *upper;
else
{
// if equal, choose randomly with equal probability
uniform_int<size_t> sample(0, 1);
if (sample(rng))
deg = *lower;
else
deg = *upper;
}
}
target = targets.find(deg)->second;
}
else
{
// select the (j,k) which is the closest in the j,k plane.
deg = degree_matrix.find_closest(jl, kl, rng);
target = targets.find(deg)->second;
// cerr << "wanted: " << jl << ", " << kl
// << " got: " << deg.first << ", " << deg.second << "\n";
}
}
//add edge
graph_traits<multigraph_t>::edge_descriptor e;
e = add_edge(vertex(source.index, _mg),
vertex(target.index, _mg), _mg).first;
_edge_index[e] = i;
// if target received all the edges it should, remove it from target
if (in_degree(vertex(target.index, _mg), _mg) == target.in_degree)
{
targets_t::iterator iter,end;
for(tie(iter,end) =
targets.equal_range(make_pair(target.in_degree,
target.out_degree));
iter != end; ++iter)
if (iter->second == target)
{
targets.erase(iter);
break;
}
// if there are no more targets with (jl,kl), remove pair from
// target_degrees, etc.
if (targets.find(make_pair(target.in_degree, target.out_degree)) ==
targets.end())
{
target_degrees.erase(target_degrees.find(make_pair
(target.in_degree,
target.out_degree)));
if (target_degrees.bucket_count() > 2*target_degrees.size())
{
target_degrees_t temp;
for(target_degrees_t::iterator iter =
target_degrees.begin();
iter != target_degrees.end(); ++iter)
temp.insert(*iter);
target_degrees = temp;
}
if (undirected_corr)
{
for(ordered_degrees_t::iterator iter =
ordered_degrees.find(make_pair(target.in_degree,