Commit 31791c81 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Change histogram binning semantics

Now the meaning of the bin edges given to the several histogram
functions are the same as in numpy/scipy. This means that the last bin
edge represents an upper bound on the last bin.
parent e75cf856
......@@ -126,7 +126,7 @@ rcParams["text.latex.preamble"] = [#"\usepackage{times}",
"\usepackage{amssymb}",
"\usepackage{amsmath}"]
from numpy import array
from numpy import *
import scipy
import scipy.stats
from math import *
......
......@@ -2,8 +2,8 @@
# We probably will need some things from several places
import sys, os
from pylab import * # for plotting
from numpy.random import * # for random sampling
from pylab import * # for plotting
from numpy.random import * # for random sampling
seed(42)
# We need to import the graph_tool module itself
......@@ -56,13 +56,14 @@ for i in xrange(1, N):
in_hist = vertex_hist(g, "in")
figure(figsize=(4,3))
errorbar(in_hist[1], in_hist[0], fmt="o", yerr=sqrt(in_hist[0]), label="in")
figure(figsize=(4, 3))
errorbar(in_hist[1][:-1], in_hist[0], fmt="o", yerr=sqrt(in_hist[0]),
label="in")
gca().set_yscale("log")
gca().set_xscale("log")
gca().set_ylim(1e-1,1e5)
gca().set_xlim(0.8,1e3)
subplots_adjust(left=0.2,bottom=0.2)
gca().set_ylim(1e-1, 1e5)
gca().set_xlim(0.8, 1e3)
subplots_adjust(left=0.2, bottom=0.2)
xlabel("$k_{in}$")
ylabel("$NP(k_{in})$")
savefig("deg-hist.png")
......
......@@ -202,34 +202,7 @@ struct get_correlation_histogram
for (size_t i = 0; i < bins.size(); ++i)
clean_bins(_bins[i], bins[i]);
// find the data range
pair<type1,type1> range1;
pair<type2,type2> range2;
range1.first = range1.second = bins[0][0];
range2.first = range2.second = bins[1][0];
if (bins[0].size() == 1 || bins[1].size() == 1)
{
typename graph_traits<Graph>::vertex_iterator vi,vi_end;
range1.first = boost::numeric::bounds<type1>::highest();
range1.second = boost::numeric::bounds<type1>::lowest();
range2.first = boost::numeric::bounds<type2>::highest();
range2.second = boost::numeric::bounds<type2>::lowest();
for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi)
{
type1 v1 = deg1(*vi, g);
type2 v2 = deg2(*vi, g);
range1.first = min(range1.first, v1);
range1.second = max(range1.second, v1);
range2.first = min(range2.first, v2);
range2.second = max(range2.second, v2);
}
}
boost::array<pair<val_type, val_type>, 2> data_range;
data_range[0] = range1;
data_range[1] = range2;
hist_t hist(bins, data_range);
hist_t hist(bins);
SharedHistogram<hist_t> s_hist(hist);
int i, N = num_vertices(g);
......@@ -288,25 +261,9 @@ struct get_avg_correlation
bins[0].resize(_bins.size());
clean_bins(_bins, bins[0]);
// find the data range
array<pair<val_type,val_type>,1> data_range;
data_range[0].first = data_range[0].second = _bins[0];
if (bins.size() == 1)
{
typename graph_traits<Graph>::vertex_iterator vi, vi_end;
data_range[0].first = boost::numeric::bounds<type1>::highest();
data_range[0].second = boost::numeric::bounds<type1>::lowest();
for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi)
{
val_type v1 = deg1(*vi, g);
data_range[0].first = min(data_range[0].first, v1);
data_range[0].second = max(data_range[0].second, v1);
}
}
sum_t sum(bins, data_range);
sum_t sum2(bins, data_range);
count_t count(bins, data_range);
sum_t sum(bins);
sum_t sum2(bins);
count_t count(bins);
SharedHistogram<sum_t> s_sum(sum);
SharedHistogram<sum_t> s_sum2(sum2);
......
......@@ -52,39 +52,49 @@ public:
typedef typename boost::mpl::if_<boost::is_floating_point<ValueType>,
ValueType, double>::type mean_t;
Histogram(const boost::array<std::vector<ValueType>, Dim>& bins,
const boost::array<std::pair<ValueType,ValueType>,Dim>&
data_range)
: _bins(bins), _data_range(data_range)
Histogram(const boost::array<std::vector<ValueType>, Dim>& bins):
_bins(bins)
{
bin_t new_shape;
for (size_t j = 0; j < Dim; ++j)
{
if (_bins[j].size() < 2)
throw std::range_error("invalid bin edge number < 2!");
_data_range[j] = std::make_pair(0, 0);
// detect whether the given bins are of constant width, for faster
// binning
bool const_width = true;
value_type delta = (bins.size() > 1) ?
_bins[j][1] - _bins[j][0] : 0.0;
for (size_t i = 1; i < _bins[j].size(); ++i)
_const_width[j] = true;
value_type delta = _bins[j][1] - _bins[j][0];
for (size_t i = 2; i < _bins[j].size(); ++i)
{
value_type d = _bins[j][i] - _bins[j][i-1];
if (delta != d)
const_width = false;
_const_width[j] = false;
}
if (bins.size() > 1 && const_width)
if (_const_width[j])
{
_data_range[j] = make_pair(_bins[j].front(),
_bins[j].back() + delta);
_bins[j].resize(1);
_bins[j][0] = delta;
if (_bins[j].size() > 2)
{
_data_range[j] = std::make_pair(_bins[j].front(),
_bins[j].back());
new_shape[j] = _bins[j].size() - 1;
}
else
{
_data_range[j] = std::make_pair(_bins[j].front(),
_bins[j].front());
new_shape[j] = 1;
}
if (delta == 0)
throw std::range_error("invalid bin size of zero!");
}
if (_bins[j].size() > 1)
new_shape[j] = _bins[j].size();
else
new_shape[j] = size_t((data_range[j].second -
data_range[j].first) / _bins[j][0]) + 1;
{
new_shape[j] = _bins[j].size() - 1;
}
}
_counts.resize(new_shape);
}
......@@ -94,24 +104,33 @@ public:
bin_t bin;
for (size_t i = 0; i < Dim; ++i)
{
if (_bins[i].size() == 1) // constant bin width
if (_const_width[i])
{
if (v[i] < _data_range[i].first || v[i] > _data_range[i].second)
if (_data_range[i].first != _data_range[i].second &&
(v[i] < _data_range[i].first ||
v[i] > _data_range[i].second))
return; // out of bounds
bin[i] = (v[i] - _data_range[i].first) / _bins[i][0];
if (bin[i] >= _counts.shape()[i]) // bogus _data_range...
throw std::range_error("given value does not correspond to"
" data_range");
value_type delta = _bins[i][1] - _bins[i][0];
bin[i] = (v[i] - _data_range[i].first) / delta;
if (bin[i] >= _counts.shape()[i]) // modify shape
{
bin_t new_shape;
for (size_t j = 0; j < Dim; ++j)
new_shape[j] = _counts.shape()[j];
new_shape[i] = bin[i] + 1;
_counts.resize(new_shape);
while (_bins[i].size() < new_shape[i] + 1)
_bins[i].push_back(_bins[i].back() + delta);
}
}
else // arbitrary bins widths. do a binary search
{
std::vector<ValueType>& bins = _bins[i];
typeof(bins.begin()) iter = upper_bound(bins.begin(),
bins.end(), v[i]);
if (iter == bins.end()) // larger than any bin, thus belongs to
{ // the last one
bin[i] = bins.size() - 1;
if (iter == bins.end())
{
return; // falls off from last bin, do not count
}
else
{
......@@ -131,28 +150,13 @@ public:
boost::array<std::pair<ValueType,ValueType>,Dim>& GetDataRange()
{ return _data_range; }
boost::array<std::vector<ValueType>, Dim> GetBins()
{
boost::array<std::vector<ValueType>, Dim> bins;
for (size_t j = 0; j < Dim; ++j)
if (_bins[j].size() == 1) // constant bin width
{
for (ValueType i = _data_range[j].first;
i <= _data_range[j].second; i += _bins[j][0])
bins[j].push_back(i);
}
else
{
bins[j] = _bins[j];
}
return bins;
}
boost::array<std::vector<ValueType>, Dim>& GetBins() { return _bins; }
protected:
boost::multi_array<CountType,Dim> _counts;
boost::array<std::vector<ValueType>, Dim> _bins;
boost::array<std::pair<ValueType,ValueType>,Dim> _data_range;
boost::array<bool,Dim> _const_width;
};
......@@ -185,14 +189,10 @@ public:
_sum->GetArray().resize(shape);
for (size_t i = 0; i < this->_counts.num_elements(); ++i)
_sum->GetArray().data()[i] += this->_counts.data()[i];
for (size_t i = 0; i < size_t(Histogram::dim::value); ++i)
for (int i = 0; i < Histogram::dim::value; ++i)
{
_sum->GetDataRange()[i].first =
min(this->_data_range[i].first,
_sum->GetDataRange()[i].first);
_sum->GetDataRange()[i].second =
max(this->_data_range[i].second,
_sum->GetDataRange()[i].second);
if (_sum->GetBins()[i].size() < this->_bins[i].size())
_sum->GetBins()[i] = this->_bins[i];
}
}
_sum = 0;
......
......@@ -73,11 +73,7 @@ struct get_distance_histogram
for (size_t i = 0; i < obins.size(); ++i)
bins[0][i] = obins[i];
// only used for constant-sized bins
boost::array<pair<val_type, val_type>, 1> data_range;
data_range[0].first = data_range[0].second = 0;
hist_t hist(bins, data_range);
hist_t hist(bins);
SharedHistogram<hist_t> s_hist(hist);
typename hist_t::point_t point;
......
......@@ -76,11 +76,7 @@ struct get_sampled_distance_histogram
for (size_t i = 0; i < obins.size(); ++i)
bins[0][i] = obins[i];
// only used for constant-sized bins
boost::array<pair<val_type, val_type>, 1> data_range;
data_range[0].first = data_range[0].second = 0;
hist_t hist(bins, data_range);
hist_t hist(bins);
SharedHistogram<hist_t> s_hist(hist);
vector<vertex_t> sources;
......
......@@ -34,16 +34,6 @@ using namespace boost;
class VertexHistogramFiller
{
public:
template <class Graph, class DegreeSelector, class ValueType>
void UpdateRange(Graph& g,
typename graph_traits<Graph>::vertex_descriptor v,
DegreeSelector& deg, pair<ValueType,ValueType>& range)
{
ValueType val = deg(v, g);
range.first = min(range.first, val);
range.second = max(range.second, val);
}
template <class Graph, class DegreeSelector, class Hist>
void operator()(Graph& g, typename graph_traits<Graph>::vertex_descriptor v,
DegreeSelector& deg, Hist& hist)
......@@ -57,21 +47,6 @@ public:
class EdgeHistogramFiller
{
public:
template <class Graph, class EdgeProperty, class ValueType>
void UpdateRange(Graph& g,
typename graph_traits<Graph>::vertex_descriptor v,
EdgeProperty& eprop, pair<ValueType,ValueType>& range)
{
typename graph_traits<Graph>::out_edge_iterator e, e_begin, e_end;
tie(e_begin,e_end) = out_edges(v, g);
for(e = e_begin; e != e_end; ++e)
{
ValueType val = eprop[*e];
range.first = min(range.first, val);
range.second = max(range.second, val);
}
}
template <class Graph, class EdgeProperty, class Hist>
void operator()(Graph& g, typename graph_traits<Graph>::vertex_descriptor v,
EdgeProperty& eprop, Hist& hist)
......@@ -134,29 +109,10 @@ struct get_histogram
}
bins = temp_bin;
// find the data range
pair<value_type,value_type> range;
if (bins.size() == 1)
{
typename graph_traits<Graph>::vertex_iterator vi,vi_end;
range.first = boost::numeric::bounds<value_type>::highest();
range.second = boost::numeric::bounds<value_type>::lowest();
for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi)
filler.UpdateRange(g, *vi, deg, range);
}
else
{
range.first = bins.front();
range.second = bins.back();
}
boost::array<pair<value_type, value_type>, 1> data_range;
data_range[0] = range;
boost::array<vector<value_type>, 1> bin_list;
bin_list[0] = bins;
hist_t hist(bin_list, data_range);
hist_t hist(bin_list);
SharedHistogram<hist_t> s_hist(hist);
int i, N = num_vertices(g);
......
......@@ -49,6 +49,7 @@ from numpy import *
__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
"combined_corr_hist", "avg_neighbour_corr", "avg_combined_corr"]
def assortativity(g, deg):
r"""
Obtain the assortativity coefficient for the given graph.
......@@ -119,6 +120,7 @@ def assortativity(g, deg):
return libgraph_tool_correlations.\
assortativity_coefficient(g._Graph__graph, _degree(g, deg))
def scalar_assortativity(g, deg):
r"""
Obtain the scalar assortativity coefficient for the given graph.
......@@ -193,7 +195,8 @@ def scalar_assortativity(g, deg):
scalar_assortativity_coefficient(g._Graph__graph,
_degree(g, deg))
def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None,
def corr_hist(g, deg_source, deg_target, bins=[[0, 1], [0, 1]], weight=None,
float_count=True):
r"""
Obtain the correlation histogram for the given graph.
......@@ -209,9 +212,9 @@ def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None,
degree type ("in", "out" or "total") or vertex property map for the
target vertex.
bins : list of lists (optional, default: [[1], [1]])
A list of bins to be used for the source and target degrees. If any list
has size 1, it is used as the constant width of an automatically
generated bin range.
A list of bin edges to be used for the source and target degrees. If any
list has size 2, it is used as the constant width of an automatically
generated bin range, starting from the first value.
weight : edge property map (optional, default: None)
Weight (multiplicative factor) to be used on each edge.
float_count : bool (optional, default: True)
......@@ -282,12 +285,14 @@ def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None,
ret = libgraph_tool_correlations.\
vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source),
_degree(g, deg_target),
_prop("e", g, weight), bins[0], bins[1])
_prop("e", g, weight),
[float(x) for x in bins[0]],
[float(x) for x in bins[1]])
return [array(ret[0], dtype="float64") if float_count else ret[0],
[ret[1][0], ret[1][1]]]
def combined_corr_hist(g, deg1, deg2, bins=None, float_count=True):
def combined_corr_hist(g, deg1, deg2, bins=[[0, 1], [0, 1]], float_count=True):
r"""
Obtain the single-vertex combined correlation histogram for the given graph.
......@@ -299,10 +304,10 @@ def combined_corr_hist(g, deg1, deg2, bins=None, float_count=True):
first degree type ("in", "out" or "total") or vertex property map.
deg2 : string or :class:`~graph_tool.PropertyMap`
second degree type ("in", "out" or "total") or vertex property map.
bins : list of lists (optional, default: [[1], [1]])
A list of bins to be used for the first and second degrees. If any list
has size 1, it is used as the constant width of an automatically
generated bin range.
bins : list of lists (optional, default: [[0, 1], [0, 1]])
A list of bin edges to be used for the first and second degrees. If any
list has size 2, it is used as the constant width of an automatically
generated bin range, starting from the first value.
float_count : bool (optional, default: True)
If True, the bin counts are converted float variables, which is useful
for normalization, and other processing. It False, the bin counts will
......@@ -362,18 +367,17 @@ def combined_corr_hist(g, deg1, deg2, bins=None, float_count=True):
Combined in/out-degree correlation histogram.
"""
if bins == None:
bins = [[1], [1]]
ret = libgraph_tool_correlations.\
vertex_combined_correlation_histogram(g._Graph__graph,
_degree(g, deg1),
_degree(g, deg2),
bins[0], bins[1])
[float(x) for x in bins[0]],
[float(x) for x in bins[1]])
return [array(ret[0], dtype="float64") if float_count else ret[0],
[ret[1][0], ret[1][1]]]
def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None):
def avg_neighbour_corr(g, deg_source, deg_target, bins=[0, 1], weight=None):
r"""
Obtain the average neighbour-neighbour correlation for the given graph.
......@@ -387,9 +391,10 @@ def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None):
deg_target : string or :class:`~graph_tool.PropertyMap`
degree type ("in", "out" or "total") or vertex property map for the
target vertex.
bins : list (optional, default: [1])
Bins to be used for the source degrees. If the list has size 1, it is
used as the constant width of an automatically generated bin range.
bins : list (optional, default: [0, 1])
Bins to be used for the source degrees. If the list has size 2, it is
used as the constant width of an automatically generated bin range,
starting from the first value.
weight : edge property map (optional, default: None)
Weight (multiplicative factor) to be used on each edge.
......@@ -442,7 +447,7 @@ def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None):
<...>
>>> ylabel("target out-degree")
<...>
>>> errorbar(h[2], h[0], yerr=h[1], fmt="o")
>>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
(...)
>>> savefig("avg_corr.png")
......@@ -452,15 +457,14 @@ def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None):
Average out/out degree correlation.
"""
if bins == None:
bins = [1]
ret = libgraph_tool_correlations.\
vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source),
_degree(g, deg_target), _prop("e", g, weight),
bins)
[float(x) for x in bins])
return [ret[0], ret[1], ret[2][0]]
def avg_combined_corr(g, deg1, deg2, bins=None):
def avg_combined_corr(g, deg1, deg2, bins=[0, 1]):
r"""
Obtain the single-vertex combined correlation histogram for the given graph.
......@@ -472,10 +476,10 @@ def avg_combined_corr(g, deg1, deg2, bins=None):
first degree type ("in", "out" or "total") or vertex property map.
deg2 : string or :class:`~graph_tool.PropertyMap`
second degree type ("in", "out" or "total") or vertex property map.
bins : list (optional, default: [1])
A list of bins to be used for the first degrees. If the list
has size 1, it is used as the constant width of an automatically
generated bin range.
bins : list (optional, default: [0, 1])
Bins to be used for the first degrees. If the list has size 2, it is
used as the constant width of an automatically generated bin range,
starting from the first value.
Returns
-------
......@@ -520,7 +524,7 @@ def avg_combined_corr(g, deg1, deg2, bins=None):
<...>
>>> ylabel("out-degree")
<...>
>>> errorbar(h[2], h[0], yerr=h[1], fmt="o")
>>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o")
(...)
>>> savefig("combined_avg_corr.png")
......@@ -530,9 +534,8 @@ def avg_combined_corr(g, deg1, deg2, bins=None):
Average combined in/out-degree correlation.
"""
if bins == None:
bins = [1]
ret = libgraph_tool_correlations.\
vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
_degree(g, deg2), bins)
_degree(g, deg2),
[float(x) for x in bins])
return [ret[0], ret[1], ret[2][0]]
......@@ -200,19 +200,19 @@ def random_graph(N, deg_sampler, deg_corr=None, directed=True,
>>> axes([0.1,0.15,0.63,0.8])
<...>
>>> corr = gt.avg_neighbour_corr(g, "in", "in")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{in}\right>$ vs in")
(...)
>>> corr = gt.avg_neighbour_corr(g, "in", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{out}\right>$ vs in")
(...)
>>> corr = gt.avg_neighbour_corr(g, "out", "in")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{in}\right>$ vs out")
(...)
>>> corr = gt.avg_neighbour_corr(g, "out", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{out}\right>$ vs out")
(...)
>>> legend(loc=(1.05,0.5))
......@@ -347,19 +347,19 @@ def random_rewire(g, strat="uncorrelated", parallel_edges=False,
>>> g = gt.random_graph(30000, lambda: sample_k(20),
... lambda i,j: exp(abs(i-j)), directed=False)
>>> corr = gt.avg_neighbour_corr(g, "out", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", label="original")
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="original")
(...)
>>> gt.random_rewire(g, "correlated")
>>> corr = gt.avg_neighbour_corr(g, "out", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="*", label="correlated")
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="*", label="correlated")
(...)
>>> gt.random_rewire(g)
>>> corr = gt.avg_neighbour_corr(g, "out", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", label="uncorrelated")
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="uncorrelated")
(...)
>>> gt.random_rewire(g, "erdos")
>>> corr = gt.avg_neighbour_corr(g, "out", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", label="Erdos")
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="Erdos")
(...)
>>> xlabel("$k$")
<...>
......@@ -387,29 +387,29 @@ def random_rewire(g, strat="uncorrelated", parallel_edges=False,
>>> axes([0.1,0.15,0.6,0.8])
<...>
>>> corr = gt.avg_neighbour_corr(g, "in", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{o}\right>$ vs i")
(...)
>>> corr = gt.avg_neighbour_corr(g, "out", "in")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{i}\right>$ vs o")
(...)
>>> gt.random_rewire(g, "correlated")
>>> corr = gt.avg_neighbour_corr(g, "in", "out")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",
>>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-",
... label=r"$\left<\text{o}\right>$ vs i, corr.")
(...)
>>> corr = gt.avg_neighbour_corr(g, "out", "in")
>>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-",