diff --git a/src/graph/generation/Makefile.am b/src/graph/generation/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..b2721cd7bcdf3a78c2bd3a5ecb05d183a86ba04a --- /dev/null +++ b/src/graph/generation/Makefile.am @@ -0,0 +1,43 @@ +## Process this file with automake to produce Makefile.in + +AM_CPPFLAGS =\ + -I. -I.. \ + -I $(pythondir)/numpy/core/include/numpy/ \ + -I../boost-workaround \ + -DHAVE_CONFIG_H + +AM_CXXFLAGS =\ + -Wall \ + $(PYTHON_CPPFLAGS) \ + $(BOOST_CPPFLAGS) + +AM_CFLAGS=$(AM_CXXFLAGS) + +libgraph_tool_generationdir = $(pythondir)/graph_tool/generation + +libgraph_tool_generation_LTLIBRARIES = libgraph_tool_generation.la + +libgraph_tool_generation_la_includedir = $(pythondir)/graph_tool/include + +libgraph_tool_generation_la_SOURCES = \ + graph_generation.cc + +libgraph_tool_generation_la_include_HEADERS = \ + graph_generation.hh + +libgraph_tool_generation_la_LIBADD = \ + $(PYTHON_LDFLAGS) \ + $(BOOST_LDFLAGS) \ + $(OPENMP_LDFLAGS) \ + -lboost_python \ + -lboost_iostreams \ + -lexpat + +# needed for typeinfo objects to work across DSO boundaries. +# see http://gcc.gnu.org/faq.html#dso +libgraph_tool_generation_la_LDFLAGS = \ + -module \ + -avoid-version \ + -export-dynamic \ + -no-undefined \ + -Wl,-E diff --git a/src/graph/generation/graph_generation.cc b/src/graph/generation/graph_generation.cc index a844cc02bc0e9b5363ac0e292bfcafc4c7b1ed14..605ff34d51ebcc98b7652bd7418d89883bdcae5e 100644 --- a/src/graph/generation/graph_generation.cc +++ b/src/graph/generation/graph_generation.cc @@ -15,22 +15,11 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - #include "graph.hh" -#include "histogram.hh" +#include "graph_util.hh" +#include "graph_filtering.hh" +#include "graph_generation.hh" +#include using namespace std; using namespace boost; @@ -39,536 +28,78 @@ using namespace graph_tool; typedef boost::mt19937 rng_t; -// this will sample a (j,k) pair from a pjk distribution given a ceil function -// and its inverse - -template -struct sample_from_distribution -{ - sample_from_distribution(Distribution &dist, Ceil& ceil, InvCeil &inv_ceil, - double bound, rng_t& rng) - : _dist(dist), _ceil(ceil), _inv_ceil(inv_ceil), _bound(bound), - _rng(rng), _uniform_p(0.0, 1.0) {} - - pair operator()() - { - // sample j,k from ceil - size_t j,k; - double u; - do - { - tie(j,k) = _inv_ceil(_uniform_p(_rng), _uniform_p(_rng)); - u = _uniform_p(_rng); - } - while (u > _dist(j,k)/(_bound*_ceil(j,k))); - return make_pair(j,k); - } - - Distribution& _dist; - Ceil& _ceil; - InvCeil& _inv_ceil; - double _bound; - rng_t &_rng; - boost::uniform_real _uniform_p; -}; - -// desired vertex type, with desired j,k values and the index in the real graph - -struct dvertex_t -{ - dvertex_t() {} - dvertex_t(size_t in, size_t out): in_degree(in), out_degree(out) {} - dvertex_t(const pair& deg): in_degree(deg.first), - out_degree(deg.second) {} - size_t index, in_degree, out_degree; - bool operator==(const dvertex_t& other) const {return other.index == index;} -}; - -inline std::size_t hash_value(const dvertex_t& v) -{ - size_t h = hash_value(v.in_degree); - hash_combine(h, v.out_degree); - return h; -} - -inline size_t dist(const dvertex_t& a, const dvertex_t& b) -{ - return int(a.in_degree-b.in_degree)*int(a.in_degree-b.in_degree) + - int(a.out_degree-b.out_degree)*int(a.out_degree-b.out_degree); -} - -struct total_deg_comp -{ - bool operator()(const pair& a, const pair& b) - { - return a.first + a.second < b.first + b.second; - } -}; - -// this structure will keep the existing (j,k) pairs in the graph in a matrix, -// so that the nearest (j,k) to a given target can be found easily. - -class degree_matrix_t +class PythonFuncWrap { public: - degree_matrix_t(size_t N, size_t minj, size_t mink, size_t maxj, - size_t maxk) - { - _L = max(size_t(pow(2,ceil(log2(sqrt(N))))),size_t(2)); - _minj = minj; - _mink = mink; - _maxj = max(maxj,_L); - _maxk = max(maxk,_L); - _bins.resize(_L, vector > >(_L)); - _high_bins.resize(size_t(log2(_L))); - for(size_t i = 0; i < _high_bins.size(); ++i) - _high_bins[i].resize(_L/(1<<(i+1)), vector(_L/(1<<(i+1)))); - } - - void insert(const pair& v) - { - size_t j_bin, k_bin; - tie(j_bin, k_bin) = get_bin(v.first, v.second, 0); - _bins[j_bin][k_bin].push_back(v); - for (size_t i = 0; i < _high_bins.size(); ++i) - { - size_t hj,hk; - tie(hj,hk) = get_bin(j_bin,k_bin, i+1); - _high_bins[i][hj][hk]++; - } - } - - void erase(const pair& v) - { - size_t j_bin, k_bin; - tie(j_bin, k_bin) = get_bin(v.first, v.second, 0); - for(size_t i = 0; i < _bins[j_bin][k_bin].size(); ++i) - { - if (_bins[j_bin][k_bin][i] == v) - { - _bins[j_bin][k_bin].erase(_bins[j_bin][k_bin].begin()+i); - break; - } - } - - for (size_t i = 0; i < _high_bins.size(); ++i) - { - size_t hj,hk; - tie(hj,hk) = get_bin(j_bin,k_bin, i+1); - _high_bins[i][hj][hk]--; - } + PythonFuncWrap(python::object o): _o(o) {} - } - - pair find_closest(size_t j, size_t k, rng_t& rng) + pair operator()() const { - vector > candidates; - - size_t level; - - // find the appropriate level on which to operate - for (level = _high_bins.size(); level <= 0; --level) - { - size_t hj, hk; - tie(hj,hk) = get_bin(j,k,level); - if (get_bin_count(hj,hk,level) == 0) - { - if (level < _high_bins.size()) - level++; - break; - } - } - - size_t j_bin, k_bin; - tie(j_bin, k_bin) = get_bin(j, k, level); - - for (size_t hj = ((j_bin>0)?j_bin-1:j_bin); - hj < j_bin + 1 && hj <= get_bin(_maxj, _maxk, level).first; ++hj) - for (size_t hk = ((k_bin>0)?k_bin-1:k_bin); - hk < k_bin + 1 && hk <= get_bin(_maxj, _maxk, level).second; - ++hk) - search_bin(hj,hk,j,k,level,candidates); - - uniform_int sample(0, candidates.size() - 1); - return candidates[sample(rng)]; + python::object ret = _o(); + return python::extract >(ret); } -private: - pair get_bin(size_t j, size_t k, size_t level) + size_t operator()(bool) const { - if (level == 0) - return make_pair(((j-_minj)*(_L-1))/_maxj, - ((k-_mink)*(_L-1))/_maxk); - - pair bin = get_bin(j,k,0); - bin.first /= 1 << level; - bin.second /= 1 << level; - return bin; + python::object ret = _o(); + return python::extract(ret); } - size_t get_bin_count(size_t bin_j, size_t bin_k, size_t level) + size_t operator()(size_t k) const { - if (level == 0) - return _bins[bin_j][bin_k].size(); - else - return _high_bins[level-1][bin_j][bin_k]; + python::object ret = _o(k); + return python::extract(ret); } - void search_bin(size_t hj, size_t hk, size_t j, size_t k, size_t level, - vector >& candidates) + pair operator()(pair deg) const { - size_t w = 1 << level; - for (size_t j_bin = hj*w; j_bin < (hj+1)*w; ++j_bin) - for (size_t k_bin = hk*w; k_bin < (hk+1)*w; ++k_bin) - { - for (size_t i = 0; i < _bins[j_bin][k_bin].size(); ++i) - { - pair& v = _bins[j_bin][k_bin][i]; - if (candidates.empty()) - { - candidates.push_back(v); - continue; - } - if (dist(dvertex_t(v), dvertex_t(j,k)) < - dist(dvertex_t(candidates.front()),dvertex_t(j,k))) - { - candidates.clear(); - candidates.push_back(v); - } - else if (dist(dvertex_t(v), dvertex_t(j,k)) == - dist(dvertex_t(candidates.front()),dvertex_t(j,k))) - { - candidates.push_back(v); - } - } - } + python::object ret = _o(deg.first, deg.second); + return python::extract >(ret); } - size_t _L; - vector > > > _bins; - vector > > _high_bins; - size_t _minj; - size_t _mink; - size_t _maxj; - size_t _maxk; -}; - -struct python_function -{ - python_function(python::object o): _o(o) {} - double operator()(size_t j, size_t k) - { - return python::extract(_o(j,k)); - } - double operator()(size_t jl, size_t kl, size_t j, size_t k) - { - return python::extract(_o(jl,kl,j,k)); - } - pair operator()(double r1, double r2) - { - python::object retval = _o(r1,r2); - return make_pair(size_t(max(int(python::extract(retval[0])),0)), - size_t(max(int(python::extract(retval[1])),0))); - } - pair operator()(double r1, double r2, size_t j, size_t k) - { - python::object retval = _o(r1,r2,j,k); - return make_pair(size_t(max(int(python::extract(retval[0])),0)), - size_t(max(int(python::extract(retval[1])),0))); - } +private: python::object _o; }; -// generates a directed graph with given pjk and degree correlation - -void GraphInterface::GenerateCorrelatedConfigurationalModel - (size_t N, python::object ppjk, python::object pceil_pjk, - python::object pinv_ceil_pjk, double ceil_pjk_bound, python::object pcorr, - python::object pceil_corr, python::object pinv_ceil_corr, - double ceil_corr_bound, bool undirected_corr, size_t seed, bool verbose) +void generate_random_graph(GraphInterface& gi, size_t N, + python::object deg_sample, + python::object corr_deg_sample, + bool uncorrelated, bool no_parallel, + bool no_self_loops, bool undirected, + size_t seed, bool verbose) { - typedef function pjk_t; - typedef function (double r1, double r2)> inv_ceil_t; - typedef function corr_t; - typedef function(double r1, - double r2, - size_t j, - size_t k)> inv_corr_t; - - pjk_t pjk = python_function(ppjk); - pjk_t ceil_pjk = python_function(pceil_pjk); - inv_ceil_t inv_ceil_pjk = python_function(pinv_ceil_pjk); - corr_t corr = python_function(pcorr); - corr_t ceil_corr = python_function(pceil_corr); - inv_corr_t inv_ceil_corr = python_function(pinv_ceil_corr); - - _mg.clear(); - _properties = dynamic_properties(); - rng_t rng(static_cast(seed)); - - // sample the N (j,k) pairs - - sample_from_distribution - pjk_sample(pjk, ceil_pjk, inv_ceil_pjk, ceil_pjk_bound, rng); - vector vertices(N); - size_t sum_j=0, sum_k=0, min_j=0, min_k=0, max_j=0, max_k=0; - if (verbose) - { - cout << "adding vertices: " << flush; - } - for(size_t i = 0; i < N; ++i) - { - if (verbose) - { - static stringstream str; - for (size_t j = 0; j < str.str().length(); ++j) - cout << "\b"; - str.str(""); - str << i+1 << " of " << N << " (" << (i+1)*100/N << "%)"; - cout << str.str() << flush; - } - dvertex_t& v = vertices[i]; - v.index = _vertex_index[add_vertex(_mg)]; - tie(v.in_degree, v.out_degree) = pjk_sample(); - sum_j += v.in_degree; - sum_k += v.out_degree; - min_j = min(v.in_degree,min_j); - min_k = min(v.out_degree,min_k); - max_j = max(v.in_degree,max_j); - max_k = max(v.out_degree,max_k); - } - - if (verbose) - cout << "\nfixing average degrees: " << flush; - - // and must be the same. Resample random pairs until this holds. - uniform_int vertex_sample(0, N-1); - while (sum_j != sum_k) - { - dvertex_t& v = vertices[vertex_sample(rng)]; - sum_j -= v.in_degree; - sum_k -= v.out_degree; - tie(v.in_degree, v.out_degree) = pjk_sample(); - sum_j += v.in_degree; - sum_k += v.out_degree; - max_j = max(v.in_degree,max_j); - max_k = max(v.out_degree,max_k); - if (verbose) - { - static stringstream str; - for (size_t j = 0; j < str.str().length(); ++j) - cout << "\b"; - for (size_t j = 0; j < str.str().length(); ++j) - cout << " "; - for (size_t j = 0; j < str.str().length(); ++j) - cout << "\b"; - str.str(""); - str << min(sum_j-sum_k, sum_k-sum_j); - cout << str.str() << flush; - } - } - - size_t E = sum_k; - - vector sources; // sources of edges - typedef tr1::unordered_multimap, dvertex_t, - hash > > targets_t; - targets_t targets; // vertices with j > 0 - typedef tr1::unordered_set, hash > > - target_degrees_t; - target_degrees_t target_degrees; // existing (j,k) pairs - - // fill up sources, targets and target_degrees - sources.reserve(E); - for(size_t i = 0; i < N; ++i) - { - for(size_t k = 0; k < vertices[i].out_degree; ++k) - sources.push_back(vertices[i]); - if (vertices[i].in_degree > 0) - { - targets.insert(make_pair(make_pair(vertices[i].in_degree, - vertices[i].out_degree), - vertices[i])); - target_degrees.insert(make_pair(vertices[i].in_degree, - vertices[i].out_degree)); - } - } - - typedef multiset, total_deg_comp> ordered_degrees_t; - ordered_degrees_t ordered_degrees; // (j,k) pairs ordered by (j+k), i.e, - // total degree - degree_matrix_t degree_matrix(target_degrees.size(), - min_j, min_k, - max_j, max_k); // (j,k) pairs layed out in a 2 - // dimensional matrix - for(typeof(target_degrees.begin()) iter = target_degrees.begin(); - iter != target_degrees.end(); ++iter) - if (undirected_corr) - ordered_degrees.insert(*iter); - else - degree_matrix.insert(*iter); - - // shuffle sources - for (size_t i = 0; i < sources.size(); ++i) - { - uniform_int source_sample(i, sources.size()-1); - swap(sources[i], sources[source_sample(rng)]); - } - - if (verbose) - cout << "\nadding edges: " << flush; - - // connect the sources to targets - uniform_real sample_probability(0.0, 1.0); - for (size_t i = 0; i < sources.size(); ++i) - { - dvertex_t source = sources[i], target; - size_t j = source.in_degree; - size_t k = source.out_degree; - - //choose the target vertex according to correlation - - pjk_t prob_func = lambda::bind(corr,lambda::_1,lambda::_2,j,k); - pjk_t ceil = lambda::bind(ceil_corr,lambda::_1,lambda::_2,j,k); - inv_ceil_t inv_ceil = lambda::bind(inv_ceil_corr, - lambda::_1,lambda::_2,j,k); - sample_from_distribution - corr_sample(prob_func, ceil, inv_ceil, ceil_corr_bound, rng); - - size_t jl,kl; - tie(jl,kl) = corr_sample(); // target (j,k) - - target_degrees_t::iterator iter = target_degrees.find(make_pair(jl,kl)); - if (iter != target_degrees.end()) - { - target = targets.find(*iter)->second; // if an (jl,kl) pair exists, - // just use that - } - else - { - pair deg; - if (undirected_corr) - { - // select the (j,k) pair with the closest total degree (j+k) - ordered_degrees_t::iterator upper; - upper = ordered_degrees.upper_bound(make_pair(jl,kl)); - if (upper == ordered_degrees.end()) - { - --upper; - deg = *upper; - } - else if (upper == ordered_degrees.begin()) - { - deg = *upper; - } - else - { - ordered_degrees_t::iterator lower = upper; - --lower; - if (jl + kl - (lower->first + lower->second) < - upper->first + upper->second - (jl + kl)) - deg = *lower; - else if (jl + kl - (lower->first + lower->second) != - upper->first + upper->second - (jl + kl)) - deg = *upper; - else - { - // if equal, choose randomly with equal probability - uniform_int sample(0, 1); - if (sample(rng)) - deg = *lower; - else - deg = *upper; - } - } - target = targets.find(deg)->second; - } - else - { - // select the (j,k) which is the closest in the j,k plane. - deg = degree_matrix.find_closest(jl, kl, rng); - target = targets.find(deg)->second; -// cerr << "wanted: " << jl << ", " << kl -// << " got: " << deg.first << ", " << deg.second << "\n"; - - } - } - - //add edge - graph_traits::edge_descriptor e; - e = add_edge(vertex(source.index, _mg), - vertex(target.index, _mg), _mg).first; - _edge_index[e] = i; - - // if target received all the edges it should, remove it from target - if (in_degree(vertex(target.index, _mg), _mg) == target.in_degree) - { - targets_t::iterator iter,end; - for(tie(iter,end) = - targets.equal_range(make_pair(target.in_degree, - target.out_degree)); - iter != end; ++iter) - if (iter->second == target) - { - targets.erase(iter); - break; - } - - // if there are no more targets with (jl,kl), remove pair from - // target_degrees, etc. - if (targets.find(make_pair(target.in_degree, target.out_degree)) == - targets.end()) - { - target_degrees.erase(target_degrees.find(make_pair - (target.in_degree, - target.out_degree))); - if (target_degrees.bucket_count() > 2*target_degrees.size()) - { - target_degrees_t temp; - for(target_degrees_t::iterator iter = - target_degrees.begin(); - iter != target_degrees.end(); ++iter) - temp.insert(*iter); - target_degrees = temp; - } - if (undirected_corr) - { - for(ordered_degrees_t::iterator iter = - ordered_degrees.find(make_pair(target.in_degree, - target.out_degree)); - iter != ordered_degrees.end(); ++iter) - if (*iter == make_pair(target.in_degree, - target.out_degree)) - { - ordered_degrees.erase(iter); - break; - } - } - else - { - degree_matrix.erase(make_pair(target.in_degree, - target.out_degree)); - } - } - - } - - if (verbose) - { - static stringstream str; - for (size_t j = 0; j < str.str().length(); ++j) - cout << "\b"; - for (size_t j = 0; j < str.str().length(); ++j) - cout << " "; - for (size_t j = 0; j < str.str().length(); ++j) - cout << "\b"; - str.str(""); - str << (i+1) << " of " << E << " (" << (i+1)*100/E << "%)"; - cout << str.str() << flush; - } - - } + typedef graph_tool::detail::get_all_graph_views::apply< + graph_tool::detail::scalar_pairs, mpl::bool_, + mpl::bool_, mpl::bool_, + mpl::bool_, mpl::bool_ >::type graph_views; + + if (undirected) + gi.SetDirected(false); + + if (uncorrelated) + { + run_action() + (gi, lambda::bind(gen_random_graph >(N), + lambda::_1, + PythonFuncWrap(deg_sample), + PythonFuncWrap(corr_deg_sample), + no_parallel, no_self_loops, + undirected, seed, verbose))(); + } + else + { + run_action() + (gi, lambda::bind(gen_random_graph >(N), + lambda::_1, + PythonFuncWrap(deg_sample), + PythonFuncWrap(corr_deg_sample), + no_parallel, no_self_loops, + undirected, seed, verbose))(); + } + gi.ReIndexEdges(); +} - if (verbose) - cout << "\n"; +BOOST_PYTHON_MODULE(libgraph_tool_generation) +{ + def("gen_random_graph", &generate_random_graph); } diff --git a/src/graph/generation/graph_generation.hh b/src/graph/generation/graph_generation.hh new file mode 100644 index 0000000000000000000000000000000000000000..d81eed44c40bd7d8148b641bef54466d086ab4b5 --- /dev/null +++ b/src/graph/generation/graph_generation.hh @@ -0,0 +1,593 @@ +// graph-tool -- a general graph modification and manipulation thingy +// +// Copyright (C) 2007 Tiago de Paula Peixoto +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 3 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef GRAPH_GENERATION_HH +#define GRAPH_GENERATION_HH + +#include +#include +#include +#define BOOST_DISABLE_ASSERTS +#include +#include +#include + +namespace graph_tool +{ +using namespace std; +using namespace boost; + +typedef boost::mt19937 rng_t; + +// Graph Generation +// ================ +// +// this graph generation routine is based on two inputs, the in-out degree +// probability matrix deg_matrix: +// +// deg_matrix(j,k) = P(j,k) +// +// and the 4-dimensional in-out degree correlation matrix deg_corr: +// +// deg_corr(j, k, j', k') = P((j,k) -> (j',k')), i.e. the probability that a +// directed edge exists between two _specific_ vertices of degrees (j,k) and +// (j',k') +// +// Both these structures are expensive to store, and having them would still +// mean that rejection sampling would have to be used to generate the graph, +// which can be quite slow, and unnecessary when the samples are easy to +// obtain. Therefore what is actually used are two functions, deg_sample() and +// deg_corr_sample(j,k) which should return a (j,k) degree sample which obeys +// the above probabilities. +// +// Furthermore we want also to generate _undirected_ graphs, which have the +// above probabilities changed to P(k) and deg_corr(k) respectively. + +// Utilities +// ========= + +// desired vertex type, with desired j,k values and the index in the real graph +struct dvertex_t +{ + dvertex_t() {} + dvertex_t(size_t in, size_t out): in_degree(in), out_degree(out) {} + dvertex_t(const pair& deg): in_degree(deg.first), + out_degree(deg.second) {} + size_t index, in_degree, out_degree; + bool operator==(const dvertex_t& other) const {return other.index == index;} +}; + +// utility class to sample uniformly from a collection of values +template +class Sampler +{ +public: + Sampler() {} + + template + Sampler(Iterator iter, Iterator end) + { + for(; iter != end; ++iter) + _candidates.push_back(*iter); + } + + void Insert(const ValueType& v) { _candidates.push_back(v); } + + void Remove(size_t index) + { + swap(_candidates[index], _candidates.back()); + _candidates.pop_back(); + } + + void RemoveLast() + { + if (_last != _candidates.size()) + { + swap(_candidates[_last], _candidates.back()); + _candidates.pop_back(); + } + } + + ValueType operator()(rng_t& rng, bool remove = true) + { + uniform_int<> sample(0, _candidates.size() - 1); + int i = sample(rng); + if (remove) + { + swap(_candidates[i], _candidates.back()); + ValueType ret = _candidates.back(); + _candidates.pop_back(); + _last = numeric_limits::max(); + return ret; + } + else + { + _last = i; + return _candidates[i]; + } + } + +private: + vector _candidates; + size_t _last; +}; + +// used for verbose display +void print_progress(size_t current, size_t total, stringstream& str) +{ + size_t atom = max(total/100, 1); + if ( (current % atom == 0) || current == total) + { + for (size_t j = 0; j < str.str().length(); ++j) + cout << "\b"; + str.str(""); + str << current+1 << " of " << total << " (" << + (current+1)*100/total << "%)"; + cout << str.str() << flush; + } +} + +void print_update(size_t current, stringstream& str) +{ + for (size_t j = 0; j < str.str().length(); ++j) + cout << "\b"; + for (size_t j = 0; j < str.str().length(); ++j) + cout << " "; + for (size_t j = 0; j < str.str().length(); ++j) + cout << "\b"; + str.str(""); + str << current; + cout << str.str() << flush; +} + +// +// Generation strategies +// ===================== +// +// Directed and undirected graphs have different generation strategies, which +// are defined in the classes below. + +// +// Directed graph generation strategy +// + +class DirectedStrat +{ +public: + typedef pair deg_t; // degree type + + DirectedStrat(size_t N, bool no_parallel, bool no_self_loops) + : _N(N), _no_parallel(no_parallel), _no_self_loops(no_self_loops) + { + if (_no_parallel) + _max_deg = (_no_self_loops) ? _N - 1 : N; + else + _max_deg = numeric_limits::max(); + } + + // sample the degrees of all the vertices + template + size_t SampleDegrees(Graph& g, vector& vertices, + VertexIndex vertex_index, DegSample& deg_sample, + rng_t& rng, bool verbose) + { + stringstream str; + size_t sum_j=0, sum_k=0; + for(size_t i = 0; i < _N; ++i) + { + if (verbose) + print_progress(i, _N, str); + dvertex_t& v = vertices[i]; + v.index = vertex_index[add_vertex(g)]; + do + { + tie(v.in_degree, v.out_degree) = deg_sample(); + } + while (_no_parallel && + (v.in_degree > _max_deg || v.out_degree > _max_deg)); + sum_j += v.in_degree; + sum_k += v.out_degree; + } + + if (verbose) + { + cout << "\nfixing average degrees. Total degree difference: " + << flush; + str.str(""); + } + + // and must be the same. Re-sample random pairs until this holds + uniform_int vertex_sample(0, _N-1); + size_t count = 0; + while (sum_j != sum_k) + { + dvertex_t& v = vertices[vertex_sample(rng)]; + sum_j -= v.in_degree; + sum_k -= v.out_degree; + do + { + tie(v.in_degree, v.out_degree) = deg_sample(); + } + while (_no_parallel && + (v.in_degree > _max_deg || v.out_degree > _max_deg)); + sum_j += v.in_degree; + sum_k += v.out_degree; + if (verbose && (count % 100 == 0 || sum_j == sum_k)) + print_update(min(sum_j-sum_k, sum_k-sum_j), str); + count++; + } + return sum_k; + } + + deg_t GetDegree(const dvertex_t& v) { return make_pair(v.out_degree, + v.out_degree); } + bool IsTarget(const dvertex_t& v) { return v.in_degree > 0; } + + template + bool IsStillTarget(Graph& g, const dvertex_t& v) + { + return (in_degree(vertex(v.index, g), g) != v.in_degree); + } + +private: + size_t _N; + bool _no_parallel; + bool _no_self_loops; + size_t _max_deg; +}; + +// +// Undirected graph generation strategy +// + +class UndirectedStrat +{ +public: + typedef size_t deg_t; + + UndirectedStrat(size_t N, bool no_parallel, bool no_self_loops) + : _N(N), _no_parallel(no_parallel), _no_self_loops(no_self_loops) + { + if (_no_parallel) + _max_deg = (_no_self_loops) ? _N - 1 : N; + else + _max_deg = numeric_limits::max(); + } + + // samples the degress of all vertices + template + size_t SampleDegrees(Graph& g, vector& vertices, + VertexIndex vertex_index, DegSample& deg_sample, + rng_t& rng, bool verbose) + { + stringstream str; + size_t sum_k=0; + for(size_t i = 0; i < _N; ++i) + { + if (verbose) + print_progress(i, _N, str); + dvertex_t& v = vertices[i]; + v.index = vertex_index[add_vertex(g)]; + do + { + v.out_degree = deg_sample(true); + } + while (_no_parallel && v.out_degree > _max_deg); + sum_k += v.out_degree; + } + + if (verbose) + { + cout << "\nFixing degree sum (must be even): " + << flush; + str.str(""); + } + + // sum_k must be an even number (2*num_edges). Re-sample degrees until + // this holds + uniform_int vertex_sample(0, _N-1); + size_t count = 0; + while (sum_k % 2 != 0) + { + dvertex_t& v = vertices[vertex_sample(rng)]; + sum_k -= v.out_degree; + do + { + v.out_degree = deg_sample(true); + } + while (_no_parallel && (v.out_degree > _max_deg)); + sum_k += v.out_degree; + if (verbose && (count % 100 || sum_k % 2 == 0)) + print_update(sum_k, str); + count++; + } + return sum_k; + } + + deg_t GetDegree(const dvertex_t& v) { return v.out_degree; } + bool IsTarget(const dvertex_t& v) { return v.out_degree > 0; } + + template + bool IsStillTarget(Graph& g, const dvertex_t& v) + { + return (out_degree(vertex(v.index, g), g) != v.out_degree); + } + +private: + size_t _N; + bool _no_parallel; + bool _no_self_loops; + size_t _max_deg; +}; + +// +// Correlation strategies +// ====================== +// +// Graphs can be uncorrelated or two-point correlated. The first is a special +// case of the second, but it happens quite often and it can be optimized. Below +// are two classes which implement both strategies. + +// +// Correlated graph strategy +// + +template +class CorrelatedStrat +{ +public: + void InsertTarget(const dvertex_t& v, const Deg& deg) + { + _targets.insert(make_pair(deg, v)); + } + + template + dvertex_t GetRandomTarget(const Deg& source_deg, CorrSample& corr_sample, + rng_t& rng) + { + // keep sampling until we find a vertex + typename targets_t::iterator iter,end; + Deg target_deg; + do + { + //choose the target vertex according to correlation + target_deg = corr_sample(source_deg); + tie(iter, end) = _targets.equal_range(target_deg); + } + while (iter == end); + // sample random target on the same class + Sampler sampler; + for(; iter != end; ++iter) + sampler.Insert(iter); + _last = sampler(rng); + return _last->second; + } + + void RemoveLast() { _targets.erase(_last); } + +private: + typedef tr1::unordered_multimap > targets_t; + targets_t _targets; + typename targets_t::iterator _last; +}; + +// +// Uncorrelated graph strategy +// + +template +class UncorrelatedStrat +{ +public: + void InsertTarget(const dvertex_t& v, const Deg& deg) + { + _targets.Insert(v); + } + + template + dvertex_t GetRandomTarget(const Deg& source_deg, CorrSample& corr_sample, + rng_t& rng) + { + return _targets(rng, false); + } + + void RemoveLast() { _targets.RemoveLast(); } + +private: + Sampler _targets; +}; + +// +// Main Algorithm +// ============== +// +// generates a directed or undirected graph with given degree distribution and +// correlation as defined above + +template +struct gen_random_graph +{ + gen_random_graph(size_t N): _N(N) {} + + template + void operator()(Graph* gp, DegSample& deg_sample, + CorrDegSample& deg_corr_sample, bool no_parallel, + bool no_self_loops, bool undirected, + size_t seed, bool verbose) + const + { + Graph& g = *gp; + size_t N = _N; + typename property_map::type vertex_index = + get(vertex_index_t(), g); + rng_t rng(static_cast(seed)); + + // figure out the necessary strategies + typedef typename is_convertible + ::directed_category, + directed_tag>::type is_directed; + typedef typename mpl::if_::type gen_strat_t; + typedef typename gen_strat_t::deg_t deg_t; + typedef typename mpl::if_, + UncorrelatedStrat >::type corr_strat_t; + + gen_strat_t gen_strat(N, no_parallel, no_self_loops); + corr_strat_t corr_strat; + + stringstream str; // used for verbose status + + if (verbose) + cout << "adding vertices: " << flush; + + // sample the N (j,k) pairs + vector vertices(N); + size_t E = gen_strat.SampleDegrees(g, vertices, vertex_index, + deg_sample, rng, verbose); + + vector sources; // edge sources + + // fill up sources, targets and target_degrees + sources.reserve(E); + for(size_t i = 0; i < N; ++i) + { + for(size_t k = 0; k < vertices[i].out_degree; ++k) + sources.push_back(vertices[i]); + if (gen_strat.IsTarget(vertices[i])) + corr_strat.InsertTarget(vertices[i], + gen_strat.GetDegree(vertices[i])); + } + + // shuffle sources + for (size_t i = 0; i < sources.size(); ++i) + { + uniform_int source_sample(i, sources.size()-1); + swap(sources[i], sources[source_sample(rng)]); + } + + if (verbose) + { + cout << "\nadding edges: " << flush; + str.str(""); + } + + // connect the sources to targets + for (int i = 0; i < int(sources.size()); ++i) + { + dvertex_t source = sources[i], target; + deg_t source_deg = gen_strat.GetDegree(source); + + // in undirected graphs, there's no difference between source and + // target + if (!is_directed::value && !gen_strat.IsStillTarget(g, source)) + continue; + + tr1::unordered_set old_targets; + if (no_parallel) + { + typename graph_traits::out_edge_iterator e, e_end; + for (tie(e, e_end) = out_edges(vertex(source.index, g), g); + e != e_end; ++e) + old_targets.insert(boost::target(*e,g)); + } + + // get target + target = corr_strat.GetRandomTarget(source_deg, deg_corr_sample, + rng); + + // if unacceptable (because parallel and/or self_loop), throw it + // back and disconnect a random previous (different) source + if ((no_parallel && (old_targets.find(target.index) + != old_targets.end())) || + (no_self_loops && (source.index == target.index))) + { + if (i == 0) // just try again + { + i--; + continue; + } + + uniform_int source_sample(0, i-1); + size_t s_index; + // keep trying: we don't want the same source again, and no + // sources with zero out-degree (which can happen when the graph + // is undirected, when they're orphaned after a removed edge) + do + { + s_index = source_sample(rng); + } + while (sources[s_index] == source || + out_degree(vertex(sources[s_index].index, g), g) == 0); + + dvertex_t rsource = sources[s_index]; // random source + + // get the random edge + typename graph_traits::out_edge_iterator e, e_end; + tie(e, e_end) = out_edges(vertex(rsource.index, g), g); + Sampler::edge_descriptor> + esampler(e, e_end); + typename graph_traits::edge_descriptor re = + esampler(rng, false); // random edge + + // if edge's target was already full, put it back in target list + dvertex_t rtarget = + vertices[vertex_index[boost::target(re, g)]]; + if (!gen_strat.IsStillTarget(g, rtarget)) + corr_strat.InsertTarget(rtarget, + gen_strat.GetDegree(rtarget)); + + // for undirected graphs, we need also to check the source + if (!is_directed::value) + { + dvertex_t rsource = + vertices[vertex_index[boost::source(re, g)]]; + if (!gen_strat.IsStillTarget(g, rsource)) + corr_strat.InsertTarget(rsource, + gen_strat.GetDegree(rsource)); + } + + // remove and swap with previous source and continue from then + remove_edge(re, g); + swap(sources[i-1], sources[s_index]); + i -= 2; + continue; + } + + //add edge + typename graph_traits::edge_descriptor e; + e = add_edge(vertex(source.index, g), + vertex(target.index, g), g).first; + + // if target received all the edges it should, remove it from target + // list + if (!gen_strat.IsStillTarget(g, target)) + corr_strat.RemoveLast(); + + if (verbose) + print_progress(i, E, str); + } + + if (verbose) + cout << "\n"; + } + + size_t _N; +}; + +} // graph_tool namespace + +#endif // GRAPH_GENERATION_HH