Commit 48aec77d authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add dedicated average correlation calculation

This is a re-implementation based on new histogram code, which does not
build a 2D histogram, and is thus more exact and uses less memory.
parent dc184b46
......@@ -17,6 +17,9 @@ libgraph_tool_correlations_la_SOURCES = \
graph_assortativity.cc \
graph_correlations.cc \
graph_correlations_imp1.cc \
graph_avg_correlations.cc \
graph_avg_correlations_imp1.cc \
graph_avg_correlations_combined.cc \
graph_correlations_combined.cc \
graph_correlations_bind.cc
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2007 Tiago de Paula Peixoto <tiago@forked.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_filtering.hh"
#include <boost/lambda/bind.hpp>
#include <boost/python.hpp>
#include "graph.hh"
#include "graph_selectors.hh"
#include "graph_properties.hh"
#include "graph_correlations.hh"
#include <iostream>
using namespace std;
using namespace boost;
using namespace boost::lambda;
using namespace graph_tool;
// implementations spread across different compile units to minimize memory
// usage during compilation
void graph_avg_corr_imp1(const GraphInterface& g, python::object& avg,
python::object& dev, python::object& ret_bins,
boost::any deg1, boost::any deg2,
boost::any weight,
const vector<long double>& bins);
typedef ConstantPropertyMap<int,GraphInterface::edge_t> cweight_map_t;
python::object
get_vertex_avg_correlation(const GraphInterface& gi,
GraphInterface::deg_t deg1,
GraphInterface::deg_t deg2,
boost::any weight,
const vector<long double>& bins)
{
python::object avg, dev;
python::object ret_bins;
any weight_prop;
typedef DynamicPropertyMapWrap<long double, GraphInterface::edge_t>
wrapped_weight_t;
if (!weight.empty())
{
weight_prop = wrapped_weight_t(weight, edge_scalar_properties());
}
else
weight_prop = cweight_map_t(1);
try
{
run_action<>()(gi, get_avg_correlation<GetNeighboursPairs>
(avg, dev, bins, ret_bins),
all_selectors(), all_selectors(),
mpl::vector<cweight_map_t>())
(degree_selector(deg1), degree_selector(deg2), weight_prop);
}
catch (ActionNotFound&)
{
graph_avg_corr_imp1(gi, avg, dev, ret_bins,
degree_selector(deg1),
degree_selector(deg2), weight_prop, bins);
}
return python::make_tuple(avg, dev, ret_bins);
}
using namespace boost::python;
void export_avg_correlations()
{
def("vertex_avg_correlation", &get_vertex_avg_correlation);
}
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2007 Tiago de Paula Peixoto <tiago@forked.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_filtering.hh"
#include <boost/lambda/bind.hpp>
#include <boost/python.hpp>
#include "graph.hh"
#include "graph_selectors.hh"
#include "graph_properties.hh"
#include "graph_correlations.hh"
#include <iostream>
using namespace std;
using namespace boost;
using namespace boost::lambda;
using namespace graph_tool;
typedef ConstantPropertyMap<int,GraphInterface::edge_t> dummy_weight;
python::object
get_vertex_avg_combined_correlation(const GraphInterface& gi,
GraphInterface::deg_t deg1,
GraphInterface::deg_t deg2,
const vector<long double>& bins)
{
python::object avg, dev;
python::object ret_bins;
run_action<>()(gi, get_avg_correlation<GetCombinedPair>
(avg, dev, bins, ret_bins),
all_selectors(), all_selectors(),
mpl::vector<dummy_weight>())
(degree_selector(deg1), degree_selector(deg2), dummy_weight());
return python::make_tuple(avg, dev, ret_bins);
}
using namespace boost::python;
void export_avg_combined_correlations()
{
def("vertex_avg_combined_correlation",
&get_vertex_avg_combined_correlation);
}
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2007 Tiago de Paula Peixoto <tiago@forked.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_filtering.hh"
#include <boost/lambda/bind.hpp>
#include "graph.hh"
#include "histogram.hh"
#include "graph_selectors.hh"
#include "graph_properties.hh"
#include "graph_correlations.hh"
using namespace std;
using namespace boost;
using namespace boost::lambda;
using namespace graph_tool;
void graph_avg_correlations_imp1(const GraphInterface& g, python::object& avg,
python::object& dev,
python::object& ret_bins,
boost::any deg1, boost::any deg2,
boost::any weight,
const vector<long double>& bins)
{
typedef DynamicPropertyMapWrap<long double, GraphInterface::edge_t>
wrapped_weight_t;
run_action<>()(g, get_avg_correlation<GetNeighboursPairs>
(avg, dev, bins, ret_bins),
all_selectors(), all_selectors(),
mpl::vector<wrapped_weight_t>())
(deg1, deg2, weight);
}
......@@ -24,6 +24,7 @@
#include <boost/python/object.hpp>
#include <boost/python/list.hpp>
#include <boost/python/extract.hpp>
#include "histogram.hh"
#include "numpy_bind.hh"
#include "shared_map.hh"
......@@ -52,6 +53,25 @@ public:
hist.PutValue(k, get(weight, *e));
}
}
template <class Graph, class Deg1, class Deg2, class Sum, class Count,
class WeightMap>
void operator()(typename graph_traits<Graph>::vertex_descriptor v,
Deg1& deg1, Deg2& deg2, Graph& g, WeightMap& weight,
Sum& sum, Sum& sum2, Count& count)
{
typename Sum::point_t k1;
k1[0] = deg1(v, g);
typename Sum::count_type k2;
typename graph_traits<Graph>::out_edge_iterator e, e_end;
for (tie(e,e_end) = out_edges(v, g); e != e_end; ++e)
{
k2 = deg2(target(*e,g),g)*get(weight, *e);
sum.PutValue(k1, k2);
sum2.PutValue(k1, k2*k2);
count.PutValue(k1, get(weight, *e));
}
}
};
// get degrees pairs from one single vertex
......@@ -69,6 +89,21 @@ public:
k[1] = deg2(v, g);
hist.PutValue(k);
}
template <class Graph, class Deg1, class Deg2, class Sum, class Count,
class WeightMap>
void operator()(typename graph_traits<Graph>::vertex_descriptor v,
Deg1& deg1, Deg2& deg2, Graph& g, WeightMap& weight,
Sum& sum, Sum& sum2, Count& count)
{
typename Sum::point_t k1;
k1[0] = deg1(v, g);
typename Sum::count_type k2;
k2 = deg2(v, g);
sum.PutValue(k1, k2);
sum2.PutValue(k1, k2*k2);
count.PutValue(k1, 1);
}
};
......@@ -93,8 +128,8 @@ struct select_float_and_larger
struct apply
{
typedef typename mpl::if_<
mpl::equal_to<is_floating_point<Type1>,
is_floating_point<Type2> >,
typename mpl::and_<is_floating_point<Type1>,
is_floating_point<Type2> >::type,
typename select_larger_type::apply<Type1,Type2>::type,
typename mpl::if_<is_floating_point<Type1>,
Type1,
......@@ -104,6 +139,41 @@ struct select_float_and_larger
}
template <class Value>
void clean_bins(const vector<long double>& obins, vector<Value>& rbins)
{
typedef Value val_type;
rbins.resize(obins.size());
for (size_t j = 0; j < rbins.size(); ++j)
{
// we'll attempt to recover from out of bounds conditions
try
{
rbins[j] = numeric_cast<val_type,long double>(obins[j]);
}
catch (boost::numeric::negative_overflow&)
{
rbins[j] = boost::numeric::bounds<val_type>::lowest();
}
catch (boost::numeric::positive_overflow&)
{
rbins[j] = boost::numeric::bounds<val_type>::highest();
}
}
// sort the bins
sort(rbins.begin(), rbins.end());
// clean bins of zero size
vector<val_type> temp_bin(1);
temp_bin[0] = rbins[0];
for (size_t j = 1; j < rbins.size(); ++j)
{
if (rbins[j] > rbins[j-1])
temp_bin.push_back(rbins[j]);
}
rbins = temp_bin;
}
// retrieves the generalized vertex-vertex correlation histogram
template <class GetDegreePair>
struct get_correlation_histogram
......@@ -132,37 +202,7 @@ struct get_correlation_histogram
array<vector<val_type>,2> bins;
for (size_t i = 0; i < bins.size(); ++i)
{
bins[i].resize(_bins[i].size());
for (size_t j = 0; j < bins[i].size(); ++j)
{
// we'll attempt to recover from out of bounds conditions
try
{
bins[i][j] =
numeric_cast<val_type,long double>(_bins[i][j]);
}
catch (boost::numeric::negative_overflow&)
{
bins[i][j] = boost::numeric::bounds<val_type>::lowest();
}
catch (boost::numeric::positive_overflow&)
{
bins[i][j] = boost::numeric::bounds<val_type>::highest();
}
}
// sort the bins
sort(bins[i].begin(), bins[i].end());
// clean bins of zero size
vector<val_type> temp_bin(1);
temp_bin[0] = bins[i][0];
for (size_t j = 1; j < bins[i].size(); ++j)
{
if (bins[i][j] > bins[i][j-1])
temp_bin.push_back(bins[i][j]);
}
bins[i] = temp_bin;
}
clean_bins(_bins[i], bins[i]);
// find the data range
pair<type1,type1> range1;
......@@ -213,6 +253,94 @@ struct get_correlation_histogram
python::object& _ret_bins;
};
// retrieves the generalized correlation
template <class GetDegreePair>
struct get_avg_correlation
{
get_avg_correlation(python::object& avg, python::object& dev,
const vector<long double>& bins,
python::object& ret_bins)
: _avg(avg), _dev(dev), _bins(bins), _ret_bins(ret_bins) {}
template <class Graph, class DegreeSelector1, class DegreeSelector2,
class WeightMap>
void operator()(Graph* gp, DegreeSelector1 deg1, DegreeSelector2 deg2,
WeightMap weight) const
{
Graph& g = *gp;
GetDegreePair put_point;
typedef typename DegreeSelector1::value_type type1;
typedef typename DegreeSelector2::value_type type2;
typedef typename graph_tool::detail::
select_float_and_larger::apply<type2,double>::type
avg_type;
typedef type1 val_type;
typedef typename property_traits<WeightMap>::value_type count_type;
typedef Histogram<type1,count_type,1> count_t;
typedef Histogram<val_type,avg_type,1> sum_t;
array<vector<val_type>,1> bins;
bins[0].resize(_bins.size());
clean_bins(_bins, bins[0]);
// find the data range
array<pair<val_type,val_type>,1> data_range;
typename graph_traits<Graph>::vertex_iterator vi,vi_end;
data_range[0].first = boost::numeric::bounds<type1>::highest();
data_range[0].second = boost::numeric::bounds<type1>::lowest();
for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi)
{
val_type v1 = deg1(*vi, g);
data_range[0].first = min(data_range[0].first, v1);
data_range[0].second = max(data_range[0].second, v1);
}
sum_t sum(bins, data_range);
sum_t sum2(bins, data_range);
count_t count(bins, data_range);
SharedHistogram<sum_t> s_sum(sum);
SharedHistogram<sum_t> s_sum2(sum2);
SharedHistogram<count_t> s_count(count);
int i, N = num_vertices(g);
#pragma omp parallel for default(shared) private(i) \
firstprivate(s_hist) schedule(dynamic)
for (i = 0; i < N; ++i)
{
typename graph_traits<Graph>::vertex_descriptor v = vertex(i, g);
if (v == graph_traits<Graph>::null_vertex())
continue;
put_point(v, deg1, deg2, g, weight, s_sum, s_sum2, s_count);
}
s_sum.Gather();
s_sum2.Gather();
s_count.Gather();
for (size_t i = 0; i < s_sum.GetArray().size(); ++i)
{
sum.GetArray()[i] /= count.GetArray()[i];
sum2.GetArray()[i] =
sqrt(sum2.GetArray()[i]/count.GetArray()[i] -
sum.GetArray()[i])/sqrt(count.GetArray()[i]);
}
bins = sum.GetBins();
python::list ret_bins;
ret_bins.append(wrap_vector_owned(bins[0]));
_ret_bins = ret_bins;
_avg = wrap_multi_array_owned<avg_type,1>(sum.GetArray());
_dev = wrap_multi_array_owned<avg_type,1>(sum2.GetArray());
}
python::object& _avg;
python::object& _dev;
const vector<long double>& _bins;
python::object& _ret_bins;
};
} // graph_tool namespace
......
......@@ -22,10 +22,14 @@ using namespace boost;
void export_assortativity();
void export_vertex_correlations();
void export_combined_vertex_correlations();
void export_avg_correlations();
void export_avg_combined_correlations();
BOOST_PYTHON_MODULE(libgraph_tool_correlations)
{
export_assortativity();
export_vertex_correlations();
export_combined_vertex_correlations();
export_avg_correlations();
export_avg_combined_correlations();
}
......@@ -47,4 +47,3 @@ void graph_correlations_imp1(const GraphInterface& g, python::object& hist,
(deg1, deg2, weight);
}
......@@ -46,6 +46,7 @@ public:
typedef boost::mpl::int_<Dim> dim;
typedef CountType count_type;
typedef ValueType value_type;
// floating point type to calculate the mean
typedef typename boost::mpl::if_<boost::is_floating_point<ValueType>,
......@@ -138,6 +139,7 @@ template <class Histogram>
class SharedHistogram: public Histogram
{
public:
SharedHistogram(Histogram &hist): Histogram(hist), _sum(&hist) {}
~SharedHistogram()
{
......
......@@ -38,8 +38,8 @@ sys.setdlopenflags(_orig_dlopen_flags) # reset it to normal case to avoid
from .. core import _degree, _prop
from numpy import *
__all__ = ["assortativity", "scalar_assortativity",
"corr_hist", "combined_corr_hist", "avg_neighbour_corr"]
__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
"combined_corr_hist", "avg_neighbour_corr", "avg_combined_corr"]
def assortativity(g, deg):
return libgraph_tool_correlations.\
......@@ -50,53 +50,32 @@ def scalar_assortativity(g, deg):
scalar_assortativity_coefficient(g._Graph__graph,
_degree(g, deg))
def corr_hist(g, deg1, deg2, bins=[[1],[1]], weight=None):
def corr_hist(g, deg1, deg2, bins=[[1],[1]], weight=None, float_count=True):
ret = libgraph_tool_correlations.\
vertex_correlation_histogram(g._Graph__graph, _degree(g, deg1),
_degree(g, deg2), _prop("e", g, weight),
bins[0], bins[1])
return [ret[0], [ret[1][0], ret[1][1]]]
return [array(ret[0], dtype="float64") if float_count else ret[0],
[ret[1][0], ret[1][1]]]
def combined_corr_hist(g, deg1, deg2, bins=[[1],[1]]):
def combined_corr_hist(g, deg1, deg2, bins=[[1],[1]], float_count=True):
ret = libgraph_tool_correlations.\
vertex_combined_correlation_histogram(g._Graph__graph,
_degree(g, deg1),
_degree(g, deg2),
bins[0], bins[1])
return [ret[0], [ret[1][0], ret[1][1]]]
return [array(ret[0], dtype="float64") if float_count else ret[0],
[ret[1][0], ret[1][1]]]
def avg_neighbour_corr(g, deg1, deg2, bins=[[1],[1]], weight=None):
ret = corr_hist(g, deg1, deg2, bins, weight)
xbins = ret[1][0]
ybins = ret[1][1]
counts = ret[0]
avg = empty((counts.shape[0]))
dev = empty((counts.shape[0]))
mask = empty((counts.shape[0]), dtype=dtype('bool'))
n_masked = 0
for i in xrange(0, len(ret[0])):
N = counts[i,:].sum()
if N > 0:
avg[i] = average(ybins, weights=counts[i,:])
dev[i] = sqrt(average((ybins-avg[i])**2,
weights=counts[i,:]))/sqrt(N)
mask[i] = False
else:
mask[i] = True
n_masked += 1
if n_masked > 0: # remove empty bins
navg = empty(len(avg) - n_masked)
ndev = empty(len(dev) - n_masked)
nxbins = empty(len(xbins) - n_masked)
cum = 0
for i in xrange(0, len(avg)):
if not mask[i]:
navg[i-cum] = avg[i]
ndev[i-cum] = dev[i]
nxbins[i-cum] = xbins[i]
else:
cum += 1
avg = navg
dev = ndev
xbins = nxbins
return [avg, dev, xbins]
def avg_neighbour_corr(g, deg1, deg2, bins=[1], weight=None):
ret = libgraph_tool_correlations.\
vertex_avg_correlation(g._Graph__graph, _degree(g, deg1),
_degree(g, deg2), _prop("e", g, weight),
bins)
return [ret[0], ret[1], ret[2][0]]
def avg_combined_corr(g, deg1, deg2, bins=[1]):
ret = libgraph_tool_correlations.\
vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
_degree(g, deg2), bins)
return [ret[0], ret[1], ret[2][0]]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment