Commit 31791c81 by Tiago Peixoto

Change histogram binning semantics

Now the meaning of the bin edges given to the several histogram
functions are the same as in numpy/scipy. This means that the last bin
edge represents an upper bound on the last bin.
parent e75cf856
 ... ... @@ -126,7 +126,7 @@ rcParams["text.latex.preamble"] = [#"\usepackage{times}", "\usepackage{amssymb}", "\usepackage{amsmath}"] from numpy import array from numpy import * import scipy import scipy.stats from math import * ... ...
 ... ... @@ -2,8 +2,8 @@ # We probably will need some things from several places import sys, os from pylab import * # for plotting from numpy.random import * # for random sampling from pylab import * # for plotting from numpy.random import * # for random sampling seed(42) # We need to import the graph_tool module itself ... ... @@ -56,13 +56,14 @@ for i in xrange(1, N): in_hist = vertex_hist(g, "in") figure(figsize=(4,3)) errorbar(in_hist[1], in_hist[0], fmt="o", yerr=sqrt(in_hist[0]), label="in") figure(figsize=(4, 3)) errorbar(in_hist[1][:-1], in_hist[0], fmt="o", yerr=sqrt(in_hist[0]), label="in") gca().set_yscale("log") gca().set_xscale("log") gca().set_ylim(1e-1,1e5) gca().set_xlim(0.8,1e3) subplots_adjust(left=0.2,bottom=0.2) gca().set_ylim(1e-1, 1e5) gca().set_xlim(0.8, 1e3) subplots_adjust(left=0.2, bottom=0.2) xlabel("$k_{in}$") ylabel("$NP(k_{in})$") savefig("deg-hist.png") ... ...
 ... ... @@ -202,34 +202,7 @@ struct get_correlation_histogram for (size_t i = 0; i < bins.size(); ++i) clean_bins(_bins[i], bins[i]); // find the data range pair range1; pair range2; range1.first = range1.second = bins[0][0]; range2.first = range2.second = bins[1][0]; if (bins[0].size() == 1 || bins[1].size() == 1) { typename graph_traits::vertex_iterator vi,vi_end; range1.first = boost::numeric::bounds::highest(); range1.second = boost::numeric::bounds::lowest(); range2.first = boost::numeric::bounds::highest(); range2.second = boost::numeric::bounds::lowest(); for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi) { type1 v1 = deg1(*vi, g); type2 v2 = deg2(*vi, g); range1.first = min(range1.first, v1); range1.second = max(range1.second, v1); range2.first = min(range2.first, v2); range2.second = max(range2.second, v2); } } boost::array, 2> data_range; data_range[0] = range1; data_range[1] = range2; hist_t hist(bins, data_range); hist_t hist(bins); SharedHistogram s_hist(hist); int i, N = num_vertices(g); ... ... @@ -288,25 +261,9 @@ struct get_avg_correlation bins[0].resize(_bins.size()); clean_bins(_bins, bins[0]); // find the data range array,1> data_range; data_range[0].first = data_range[0].second = _bins[0]; if (bins.size() == 1) { typename graph_traits::vertex_iterator vi, vi_end; data_range[0].first = boost::numeric::bounds::highest(); data_range[0].second = boost::numeric::bounds::lowest(); for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi) { val_type v1 = deg1(*vi, g); data_range[0].first = min(data_range[0].first, v1); data_range[0].second = max(data_range[0].second, v1); } } sum_t sum(bins, data_range); sum_t sum2(bins, data_range); count_t count(bins, data_range); sum_t sum(bins); sum_t sum2(bins); count_t count(bins); SharedHistogram s_sum(sum); SharedHistogram s_sum2(sum2); ... ...
 ... ... @@ -52,39 +52,49 @@ public: typedef typename boost::mpl::if_, ValueType, double>::type mean_t; Histogram(const boost::array, Dim>& bins, const boost::array,Dim>& data_range) : _bins(bins), _data_range(data_range) Histogram(const boost::array, Dim>& bins): _bins(bins) { bin_t new_shape; for (size_t j = 0; j < Dim; ++j) { if (_bins[j].size() < 2) throw std::range_error("invalid bin edge number < 2!"); _data_range[j] = std::make_pair(0, 0); // detect whether the given bins are of constant width, for faster // binning bool const_width = true; value_type delta = (bins.size() > 1) ? _bins[j][1] - _bins[j][0] : 0.0; for (size_t i = 1; i < _bins[j].size(); ++i) _const_width[j] = true; value_type delta = _bins[j][1] - _bins[j][0]; for (size_t i = 2; i < _bins[j].size(); ++i) { value_type d = _bins[j][i] - _bins[j][i-1]; if (delta != d) const_width = false; _const_width[j] = false; } if (bins.size() > 1 && const_width) if (_const_width[j]) { _data_range[j] = make_pair(_bins[j].front(), _bins[j].back() + delta); _bins[j].resize(1); _bins[j][0] = delta; if (_bins[j].size() > 2) { _data_range[j] = std::make_pair(_bins[j].front(), _bins[j].back()); new_shape[j] = _bins[j].size() - 1; } else { _data_range[j] = std::make_pair(_bins[j].front(), _bins[j].front()); new_shape[j] = 1; } if (delta == 0) throw std::range_error("invalid bin size of zero!"); } if (_bins[j].size() > 1) new_shape[j] = _bins[j].size(); else new_shape[j] = size_t((data_range[j].second - data_range[j].first) / _bins[j][0]) + 1; { new_shape[j] = _bins[j].size() - 1; } } _counts.resize(new_shape); } ... ... @@ -94,24 +104,33 @@ public: bin_t bin; for (size_t i = 0; i < Dim; ++i) { if (_bins[i].size() == 1) // constant bin width if (_const_width[i]) { if (v[i] < _data_range[i].first || v[i] > _data_range[i].second) if (_data_range[i].first != _data_range[i].second && (v[i] < _data_range[i].first || v[i] > _data_range[i].second)) return; // out of bounds bin[i] = (v[i] - _data_range[i].first) / _bins[i][0]; if (bin[i] >= _counts.shape()[i]) // bogus _data_range... throw std::range_error("given value does not correspond to" " data_range"); value_type delta = _bins[i][1] - _bins[i][0]; bin[i] = (v[i] - _data_range[i].first) / delta; if (bin[i] >= _counts.shape()[i]) // modify shape { bin_t new_shape; for (size_t j = 0; j < Dim; ++j) new_shape[j] = _counts.shape()[j]; new_shape[i] = bin[i] + 1; _counts.resize(new_shape); while (_bins[i].size() < new_shape[i] + 1) _bins[i].push_back(_bins[i].back() + delta); } } else // arbitrary bins widths. do a binary search { std::vector& bins = _bins[i]; typeof(bins.begin()) iter = upper_bound(bins.begin(), bins.end(), v[i]); if (iter == bins.end()) // larger than any bin, thus belongs to { // the last one bin[i] = bins.size() - 1; if (iter == bins.end()) { return; // falls off from last bin, do not count } else { ... ... @@ -131,28 +150,13 @@ public: boost::array,Dim>& GetDataRange() { return _data_range; } boost::array, Dim> GetBins() { boost::array, Dim> bins; for (size_t j = 0; j < Dim; ++j) if (_bins[j].size() == 1) // constant bin width { for (ValueType i = _data_range[j].first; i <= _data_range[j].second; i += _bins[j][0]) bins[j].push_back(i); } else { bins[j] = _bins[j]; } return bins; } boost::array, Dim>& GetBins() { return _bins; } protected: boost::multi_array _counts; boost::array, Dim> _bins; boost::array,Dim> _data_range; boost::array _const_width; }; ... ... @@ -185,14 +189,10 @@ public: _sum->GetArray().resize(shape); for (size_t i = 0; i < this->_counts.num_elements(); ++i) _sum->GetArray().data()[i] += this->_counts.data()[i]; for (size_t i = 0; i < size_t(Histogram::dim::value); ++i) for (int i = 0; i < Histogram::dim::value; ++i) { _sum->GetDataRange()[i].first = min(this->_data_range[i].first, _sum->GetDataRange()[i].first); _sum->GetDataRange()[i].second = max(this->_data_range[i].second, _sum->GetDataRange()[i].second); if (_sum->GetBins()[i].size() < this->_bins[i].size()) _sum->GetBins()[i] = this->_bins[i]; } } _sum = 0; ... ...
 ... ... @@ -73,11 +73,7 @@ struct get_distance_histogram for (size_t i = 0; i < obins.size(); ++i) bins[0][i] = obins[i]; // only used for constant-sized bins boost::array, 1> data_range; data_range[0].first = data_range[0].second = 0; hist_t hist(bins, data_range); hist_t hist(bins); SharedHistogram s_hist(hist); typename hist_t::point_t point; ... ...
 ... ... @@ -76,11 +76,7 @@ struct get_sampled_distance_histogram for (size_t i = 0; i < obins.size(); ++i) bins[0][i] = obins[i]; // only used for constant-sized bins boost::array, 1> data_range; data_range[0].first = data_range[0].second = 0; hist_t hist(bins, data_range); hist_t hist(bins); SharedHistogram s_hist(hist); vector sources; ... ...
 ... ... @@ -34,16 +34,6 @@ using namespace boost; class VertexHistogramFiller { public: template void UpdateRange(Graph& g, typename graph_traits::vertex_descriptor v, DegreeSelector& deg, pair& range) { ValueType val = deg(v, g); range.first = min(range.first, val); range.second = max(range.second, val); } template void operator()(Graph& g, typename graph_traits::vertex_descriptor v, DegreeSelector& deg, Hist& hist) ... ... @@ -57,21 +47,6 @@ public: class EdgeHistogramFiller { public: template void UpdateRange(Graph& g, typename graph_traits::vertex_descriptor v, EdgeProperty& eprop, pair& range) { typename graph_traits::out_edge_iterator e, e_begin, e_end; tie(e_begin,e_end) = out_edges(v, g); for(e = e_begin; e != e_end; ++e) { ValueType val = eprop[*e]; range.first = min(range.first, val); range.second = max(range.second, val); } } template void operator()(Graph& g, typename graph_traits::vertex_descriptor v, EdgeProperty& eprop, Hist& hist) ... ... @@ -134,29 +109,10 @@ struct get_histogram } bins = temp_bin; // find the data range pair range; if (bins.size() == 1) { typename graph_traits::vertex_iterator vi,vi_end; range.first = boost::numeric::bounds::highest(); range.second = boost::numeric::bounds::lowest(); for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi) filler.UpdateRange(g, *vi, deg, range); } else { range.first = bins.front(); range.second = bins.back(); } boost::array, 1> data_range; data_range[0] = range; boost::array, 1> bin_list; bin_list[0] = bins; hist_t hist(bin_list, data_range); hist_t hist(bin_list); SharedHistogram s_hist(hist); int i, N = num_vertices(g); ... ...
 ... ... @@ -49,6 +49,7 @@ from numpy import * __all__ = ["assortativity", "scalar_assortativity", "corr_hist", "combined_corr_hist", "avg_neighbour_corr", "avg_combined_corr"] def assortativity(g, deg): r""" Obtain the assortativity coefficient for the given graph. ... ... @@ -119,6 +120,7 @@ def assortativity(g, deg): return libgraph_tool_correlations.\ assortativity_coefficient(g._Graph__graph, _degree(g, deg)) def scalar_assortativity(g, deg): r""" Obtain the scalar assortativity coefficient for the given graph. ... ... @@ -193,7 +195,8 @@ def scalar_assortativity(g, deg): scalar_assortativity_coefficient(g._Graph__graph, _degree(g, deg)) def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None, def corr_hist(g, deg_source, deg_target, bins=[[0, 1], [0, 1]], weight=None, float_count=True): r""" Obtain the correlation histogram for the given graph. ... ... @@ -209,9 +212,9 @@ def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None, degree type ("in", "out" or "total") or vertex property map for the target vertex. bins : list of lists (optional, default: [[1], [1]]) A list of bins to be used for the source and target degrees. If any list has size 1, it is used as the constant width of an automatically generated bin range. A list of bin edges to be used for the source and target degrees. If any list has size 2, it is used as the constant width of an automatically generated bin range, starting from the first value. weight : edge property map (optional, default: None) Weight (multiplicative factor) to be used on each edge. float_count : bool (optional, default: True) ... ... @@ -282,12 +285,14 @@ def corr_hist(g, deg_source, deg_target, bins=[[1], [1]], weight=None, ret = libgraph_tool_correlations.\ vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source), _degree(g, deg_target), _prop("e", g, weight), bins[0], bins[1]) _prop("e", g, weight), [float(x) for x in bins[0]], [float(x) for x in bins[1]]) return [array(ret[0], dtype="float64") if float_count else ret[0], [ret[1][0], ret[1][1]]] def combined_corr_hist(g, deg1, deg2, bins=None, float_count=True): def combined_corr_hist(g, deg1, deg2, bins=[[0, 1], [0, 1]], float_count=True): r""" Obtain the single-vertex combined correlation histogram for the given graph. ... ... @@ -299,10 +304,10 @@ def combined_corr_hist(g, deg1, deg2, bins=None, float_count=True): first degree type ("in", "out" or "total") or vertex property map. deg2 : string or :class:~graph_tool.PropertyMap second degree type ("in", "out" or "total") or vertex property map. bins : list of lists (optional, default: [[1], [1]]) A list of bins to be used for the first and second degrees. If any list has size 1, it is used as the constant width of an automatically generated bin range. bins : list of lists (optional, default: [[0, 1], [0, 1]]) A list of bin edges to be used for the first and second degrees. If any list has size 2, it is used as the constant width of an automatically generated bin range, starting from the first value. float_count : bool (optional, default: True) If True, the bin counts are converted float variables, which is useful for normalization, and other processing. It False, the bin counts will ... ... @@ -362,18 +367,17 @@ def combined_corr_hist(g, deg1, deg2, bins=None, float_count=True): Combined in/out-degree correlation histogram. """ if bins == None: bins = [[1], [1]] ret = libgraph_tool_correlations.\ vertex_combined_correlation_histogram(g._Graph__graph, _degree(g, deg1), _degree(g, deg2), bins[0], bins[1]) [float(x) for x in bins[0]], [float(x) for x in bins[1]]) return [array(ret[0], dtype="float64") if float_count else ret[0], [ret[1][0], ret[1][1]]] def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None): def avg_neighbour_corr(g, deg_source, deg_target, bins=[0, 1], weight=None): r""" Obtain the average neighbour-neighbour correlation for the given graph. ... ... @@ -387,9 +391,10 @@ def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None): deg_target : string or :class:~graph_tool.PropertyMap degree type ("in", "out" or "total") or vertex property map for the target vertex. bins : list (optional, default: [1]) Bins to be used for the source degrees. If the list has size 1, it is used as the constant width of an automatically generated bin range. bins : list (optional, default: [0, 1]) Bins to be used for the source degrees. If the list has size 2, it is used as the constant width of an automatically generated bin range, starting from the first value. weight : edge property map (optional, default: None) Weight (multiplicative factor) to be used on each edge. ... ... @@ -442,7 +447,7 @@ def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None): <...> >>> ylabel("target out-degree") <...> >>> errorbar(h[2], h[0], yerr=h[1], fmt="o") >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o") (...) >>> savefig("avg_corr.png") ... ... @@ -452,15 +457,14 @@ def avg_neighbour_corr(g, deg_source, deg_target, bins=None, weight=None): Average out/out degree correlation. """ if bins == None: bins = [1] ret = libgraph_tool_correlations.\ vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source), _degree(g, deg_target), _prop("e", g, weight), bins) [float(x) for x in bins]) return [ret[0], ret[1], ret[2][0]] def avg_combined_corr(g, deg1, deg2, bins=None): def avg_combined_corr(g, deg1, deg2, bins=[0, 1]): r""" Obtain the single-vertex combined correlation histogram for the given graph. ... ... @@ -472,10 +476,10 @@ def avg_combined_corr(g, deg1, deg2, bins=None): first degree type ("in", "out" or "total") or vertex property map. deg2 : string or :class:~graph_tool.PropertyMap second degree type ("in", "out" or "total") or vertex property map. bins : list (optional, default: [1]) A list of bins to be used for the first degrees. If the list has size 1, it is used as the constant width of an automatically generated bin range. bins : list (optional, default: [0, 1]) Bins to be used for the first degrees. If the list has size 2, it is used as the constant width of an automatically generated bin range, starting from the first value. Returns ------- ... ... @@ -520,7 +524,7 @@ def avg_combined_corr(g, deg1, deg2, bins=None): <...> >>> ylabel("out-degree") <...> >>> errorbar(h[2], h[0], yerr=h[1], fmt="o") >>> errorbar(h[2][:-1], h[0], yerr=h[1], fmt="o") (...) >>> savefig("combined_avg_corr.png") ... ... @@ -530,9 +534,8 @@ def avg_combined_corr(g, deg1, deg2, bins=None): Average combined in/out-degree correlation. """ if bins == None: bins = [1] ret = libgraph_tool_correlations.\ vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1), _degree(g, deg2), bins) _degree(g, deg2), [float(x) for x in bins]) return [ret[0], ret[1], ret[2][0]]
 ... ... @@ -200,19 +200,19 @@ def random_graph(N, deg_sampler, deg_corr=None, directed=True, >>> axes([0.1,0.15,0.63,0.8]) <...> >>> corr = gt.avg_neighbour_corr(g, "in", "in") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{in}\right>$ vs in") (...) >>> corr = gt.avg_neighbour_corr(g, "in", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{out}\right>$ vs in") (...) >>> corr = gt.avg_neighbour_corr(g, "out", "in") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{in}\right>$ vs out") (...) >>> corr = gt.avg_neighbour_corr(g, "out", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{out}\right>$ vs out") (...) >>> legend(loc=(1.05,0.5)) ... ... @@ -347,19 +347,19 @@ def random_rewire(g, strat="uncorrelated", parallel_edges=False, >>> g = gt.random_graph(30000, lambda: sample_k(20), ... lambda i,j: exp(abs(i-j)), directed=False) >>> corr = gt.avg_neighbour_corr(g, "out", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", label="original") >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="original") (...) >>> gt.random_rewire(g, "correlated") >>> corr = gt.avg_neighbour_corr(g, "out", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="*", label="correlated") >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="*", label="correlated") (...) >>> gt.random_rewire(g) >>> corr = gt.avg_neighbour_corr(g, "out", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", label="uncorrelated") >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="uncorrelated") (...) >>> gt.random_rewire(g, "erdos") >>> corr = gt.avg_neighbour_corr(g, "out", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", label="Erdos") >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", label="Erdos") (...) >>> xlabel("$k$") <...> ... ... @@ -387,29 +387,29 @@ def random_rewire(g, strat="uncorrelated", parallel_edges=False, >>> axes([0.1,0.15,0.6,0.8]) <...> >>> corr = gt.avg_neighbour_corr(g, "in", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{o}\right>$ vs i") (...) >>> corr = gt.avg_neighbour_corr(g, "out", "in") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{i}\right>$ vs o") (...) >>> gt.random_rewire(g, "correlated") >>> corr = gt.avg_neighbour_corr(g, "in", "out") >>> errorbar(corr[2], corr[0], yerr=corr[1], fmt="o-", >>> errorbar(corr[2][:-1], corr[0], yerr=corr[1], fmt="o-", ... label=r"$\left<\text{o}\right>$ vs i, corr.")