Commit 29f83ea6 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Resuscitate distance_histogram() and sampled_distance_histogram()

parent b4249772
...@@ -55,14 +55,23 @@ public: ...@@ -55,14 +55,23 @@ public:
Histogram(const boost::array<std::vector<ValueType>, Dim>& bins, Histogram(const boost::array<std::vector<ValueType>, Dim>& bins,
const boost::array<std::pair<ValueType,ValueType>,Dim>& const boost::array<std::pair<ValueType,ValueType>,Dim>&
data_range) data_range)
: _bins(bins), _data_range(data_range) : _bins(bins), _data_range(data_range), _grow(Dim, false)
{ {
bin_t new_shape; bin_t new_shape;
for (size_t j = 0; j < Dim; ++j) for (size_t j = 0; j < Dim; ++j)
if (_bins[j].size() == 1) // constant bin width if (_bins[j].size() == 1) // constant bin width
{ {
new_shape[j] = floor((_data_range[j].second - if (_data_range[j].first == _data_range[j].second)
_data_range[j].first)/_bins[j][0]) + 1; {
_grow[j] = true;
new_shape[j] == 1;
}
else
{
new_shape[j] = floor((_data_range[j].second -
_data_range[j].first) /
_bins[j][0]) + 1;
}
} }
else else
{ {
...@@ -79,6 +88,15 @@ public: ...@@ -79,6 +88,15 @@ public:
if (_bins[i].size() == 1) // constant bin width if (_bins[i].size() == 1) // constant bin width
{ {
bin[i] = (v[i] - _data_range[i].first)/_bins[i][0]; bin[i] = (v[i] - _data_range[i].first)/_bins[i][0];
if (_grow[i] && (bin[i] >= _counts.shape()[i]))
{
boost::array<size_t, Dim> new_shape;
for (size_t j = 0; j < Dim; ++j)
new_shape[j] = _counts.shape()[j];
new_shape[i] = bin[i]+1;
_counts.resize(new_shape);
_data_range[i].second = max(v[i], _data_range[i].second);
}
} }
else // arbitrary bins. do a binary search else // arbitrary bins. do a binary search
{ {
...@@ -127,6 +145,7 @@ protected: ...@@ -127,6 +145,7 @@ protected:
boost::multi_array<CountType,Dim> _counts; boost::multi_array<CountType,Dim> _counts;
boost::array<std::vector<ValueType>, Dim> _bins; boost::array<std::vector<ValueType>, Dim> _bins;
boost::array<std::pair<ValueType,ValueType>,Dim> _data_range; boost::array<std::pair<ValueType,ValueType>,Dim> _data_range;
std::vector<bool> _grow;
}; };
......
...@@ -14,12 +14,17 @@ libgraph_tool_stats_la_SOURCES = \ ...@@ -14,12 +14,17 @@ libgraph_tool_stats_la_SOURCES = \
graph_histograms.cc \ graph_histograms.cc \
graph_average.cc \ graph_average.cc \
graph_parallel.cc \ graph_parallel.cc \
graph_distance.cc \
graph_distance_sampled.cc \
graph_stats_bind.cc graph_stats_bind.cc
libgraph_tool_stats_la_include_HEADERS = \ libgraph_tool_stats_la_include_HEADERS = \
graph_parallel.hh \ graph_parallel.hh \
graph_histograms.hh \ graph_histograms.hh \
graph_average.hh graph_average.hh \
graph_distance_sampled.hh \
graph_distance.hh
libgraph_tool_stats_la_LIBADD = $(MOD_LIBADD) libgraph_tool_stats_la_LIBADD = $(MOD_LIBADD)
......
...@@ -24,34 +24,34 @@ ...@@ -24,34 +24,34 @@
using namespace std; using namespace std;
using namespace boost; using namespace boost;
using namespace boost::lambda;
using namespace graph_tool; using namespace graph_tool;
hist_t GraphInterface::GetDistanceHistogram(string weight) const typedef Histogram<size_t, size_t, 1> hist_t;
python::object distance_histogram(GraphInterface& gi, boost::any weight,
const vector<long double>& bins)
{ {
hist_t hist; python::object ret;
if (weight == "") if (weight.empty())
{ {
run_action<>()(*this, run_action<>()(gi,
bind<void>(get_distance_histogram(), _1, bind<void>(get_distance_histogram(), _1,
_vertex_index, no_weightS(), var(hist)))(); gi.GetVertexIndex(), no_weightS(),
ref(bins), ref(ret)))();
} }
else else
{ {
try run_action<>()(gi,
{ bind<void>(get_distance_histogram(), _1,
run_action<>()(*this, gi.GetVertexIndex(), _2,
bind<void>(get_distance_histogram(), _1, ref(bins), ref(ret)),
_vertex_index, _2, var(hist)), edge_scalar_properties())(weight);
edge_scalar_properties())
(prop(weight, _edge_index, _properties));
}
catch (property_not_found& e)
{
throw GraphException("error getting edge scalar property: " +
string(e.what()));
}
} }
return hist; return ret;
}
void export_distance()
{
python::def("distance_histogram", &distance_histogram);
} }
...@@ -18,10 +18,16 @@ ...@@ -18,10 +18,16 @@
#ifndef GRAPH_DISTANCE_HH #ifndef GRAPH_DISTANCE_HH
#define GRAPH_DISTANCE_HH #define GRAPH_DISTANCE_HH
#include <tr1/unordered_set>
#include <boost/graph/breadth_first_search.hpp> #include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp> #include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/python/object.hpp>
#include <boost/python/list.hpp>
#include <boost/python/extract.hpp>
#include "histogram.hh"
#include "numpy_bind.hh"
namespace graph_tool namespace graph_tool
{ {
using namespace std; using namespace std;
...@@ -31,59 +37,97 @@ using namespace boost; ...@@ -31,59 +37,97 @@ using namespace boost;
struct no_weightS {}; struct no_weightS {};
template <class Map>
struct get_val_type
{
typedef typename property_traits<Map>::value_type type;
};
template <>
struct get_val_type<no_weightS>
{
typedef size_t type;
};
struct get_distance_histogram struct get_distance_histogram
{ {
template <class Graph, class IndexMap, class WeightMap, class Hist> template <class Graph, class VertexIndex, class WeightMap>
void operator()(const Graph *gp, IndexMap index_map, WeightMap weights, void operator()(const Graph& g, VertexIndex vertex_index, WeightMap weights,
Hist& hist) const const vector<long double>& obins, python::object& phist)
const
{ {
const Graph& g = *gp;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
// select get_vertex_dists based on the existence of weights // select get_vertex_dists based on the existence of weights
typedef typename mpl::if_<is_same<WeightMap, no_weightS>, typedef typename mpl::if_<is_same<WeightMap, no_weightS>,
get_dists_bfs, get_dists_bfs,
get_dists_djk>::type get_vertex_dists_t; get_dists_djk>::type get_vertex_dists_t;
// distance type
typedef typename get_val_type<WeightMap>::type val_type;
typedef Histogram<val_type, size_t, 1> hist_t;
array<vector<val_type>,1> bins;
bins[0].resize(obins.size());
for (size_t i = 0; i < obins.size(); ++i)
bins[0][i] = obins[i];
// only used for constant-sized bins
boost::array<pair<val_type, val_type>, 1> data_range;
data_range[0].first = data_range[0].second = 0;
hist_t hist(bins, data_range);
SharedHistogram<hist_t> s_hist(hist);
typename hist_t::point_t point;
get_vertex_dists_t get_vertex_dists; get_vertex_dists_t get_vertex_dists;
int i, N = num_vertices(g); int i, N = num_vertices(g);
#pragma omp parallel for default(shared) private(i,point) \
#pragma omp parallel for default(shared) private(i) firstprivate(s_hist) schedule(dynamic)
for (i = 0; i < N; ++i) for (i = 0; i < N; ++i)
{ {
vertex_t v = vertex(i, g); vertex_t v = vertex(i, g);
if (v == graph_traits<Graph>::null_vertex()) if (v == graph_traits<Graph>::null_vertex())
continue; continue;
typedef tr1::unordered_map<vertex_t,double, unchecked_vector_property_map<val_type,VertexIndex>
DescriptorHash<IndexMap> > dmap_t; dist_map(vertex_index, num_vertices(g));
dmap_t dmap(0, DescriptorHash<IndexMap>(index_map));
InitializedPropertyMap<dmap_t> for (size_t j = 0; j < N; ++j)
dist_map(dmap, numeric_limits<double>::max()); {
if (vertex(i,g) != graph_traits<Graph>::null_vertex())
dist_map[vertex(j,g)] = numeric_limits<val_type>::max();
}
dist_map[v] = 0.0; dist_map[v] = 0;
get_vertex_dists(g, v, index_map, dist_map, weights); get_vertex_dists(g, v, vertex_index, dist_map, weights);
typename graph_traits<Graph>::vertex_iterator v2, v_end; typename graph_traits<Graph>::vertex_iterator v2, v_end;
for (tie(v2, v_end) = vertices(g); v2 != v_end; ++v2) for (tie(v2, v_end) = vertices(g); v2 != v_end; ++v2)
if (*v2 != v && dist_map[*v2] != numeric_limits<double>::max()) if (*v2 != v &&
dist_map[*v2] != numeric_limits<val_type>::max())
{ {
double dist = dist_map[*v2]; point[0] = dist_map[*v2];
#pragma omp atomic s_hist.PutValue(point);
hist[dist]++;
} }
} }
s_hist.Gather();
python::list ret;
ret.append(wrap_multi_array_owned<size_t,1>(hist.GetArray()));
ret.append(wrap_vector_owned<val_type>(hist.GetBins()[0]));
phist = ret;
} }
// weighted version. Use dijkstra_shortest_paths() // weighted version. Use dijkstra_shortest_paths()
struct get_dists_djk struct get_dists_djk
{ {
template <class Graph, class Vertex, class IndexMap, class DistanceMap, template <class Graph, class Vertex, class VertexIndex,
class WeightMap> class DistanceMap, class WeightMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map, void operator()(const Graph& g, Vertex s, VertexIndex vertex_index,
DistanceMap dist_map, WeightMap weights) const DistanceMap dist_map, WeightMap weights) const
{ {
dijkstra_shortest_paths(g, s, vertex_index_map(index_map). dijkstra_shortest_paths(g, s, vertex_index_map(vertex_index).
weight_map(weights).distance_map(dist_map)); weight_map(weights).distance_map(dist_map));
} }
}; };
...@@ -91,14 +135,15 @@ struct get_distance_histogram ...@@ -91,14 +135,15 @@ struct get_distance_histogram
// unweighted version. Use BFS. // unweighted version. Use BFS.
struct get_dists_bfs struct get_dists_bfs
{ {
template <class Graph, class Vertex, class IndexMap, class DistanceMap> template <class Graph, class Vertex, class VertexIndex,
void operator()(const Graph& g, Vertex s, IndexMap index_map, class DistanceMap>
void operator()(const Graph& g, Vertex s, VertexIndex vertex_index,
DistanceMap dist_map, no_weightS) const DistanceMap dist_map, no_weightS) const
{ {
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef tr1::unordered_map<vertex_t,default_color_type, typedef tr1::unordered_map<vertex_t,default_color_type,
DescriptorHash<IndexMap> > cmap_t; DescriptorHash<VertexIndex> > cmap_t;
cmap_t cmap(0, DescriptorHash<IndexMap>(index_map)); cmap_t cmap(0, DescriptorHash<VertexIndex>(vertex_index));
InitializedPropertyMap<cmap_t> InitializedPropertyMap<cmap_t>
color_map(cmap, color_traits<default_color_type>::white()); color_map(cmap, color_traits<default_color_type>::white());
......
...@@ -17,46 +17,46 @@ ...@@ -17,46 +17,46 @@
#include "graph_filtering.hh" #include "graph_filtering.hh"
#include "graph.hh" #include "graph.hh"
#include "histogram.hh"
#include "graph_selectors.hh" #include "graph_selectors.hh"
#include "graph_properties.hh" #include "graph_properties.hh"
#include <boost/lambda/bind.hpp>
#include "graph_distance_sampled.hh" #include "graph_distance_sampled.hh"
typedef std::tr1::mt19937 rng_t;
using namespace std; using namespace std;
using namespace boost; using namespace boost;
using namespace boost::lambda;
using namespace graph_tool; using namespace graph_tool;
hist_t typedef Histogram<size_t, size_t, 1> hist_t;
GraphInterface::GetSampledDistanceHistogram(string weight, size_t samples,
size_t seed) const python::object sampled_distance_histogram(GraphInterface& gi, boost::any weight,
const vector<long double>& bins,
size_t n_samples, size_t seed)
{ {
hist_t hist; rng_t rng(static_cast<rng_t::result_type>(seed));
python::object ret;
if (weight == "") if (weight.empty())
{ {
run_action<>()(*this, bind<void>(get_sampled_distances(), _1, run_action<>()(gi,
_vertex_index, no_weightS(), var(hist), bind<void>(get_sampled_distance_histogram(), _1,
samples, seed))(); gi.GetVertexIndex(), no_weightS(), n_samples,
ref(bins), ref(ret), ref(rng)))();
} }
else else
{ {
try run_action<>()(gi,
{ bind<void>(get_sampled_distance_histogram(), _1,
run_action<>()(*this, gi.GetVertexIndex(), _2, n_samples,
bind<void>(get_sampled_distances(), _1, ref(bins), ref(ret), ref(rng)),
_vertex_index, _2, var(hist), samples, edge_scalar_properties())(weight);
seed), edge_scalar_properties())
(prop(weight, _edge_index, _properties));
}
catch (property_not_found& e)
{
throw GraphException("error getting scalar property: " +
string(e.what()));
}
} }
return hist; return ret;
}
void export_sampled_distance()
{
python::def("sampled_distance_histogram", &sampled_distance_histogram);
} }
...@@ -18,92 +18,133 @@ ...@@ -18,92 +18,133 @@
#ifndef GRAPH_DISTANCE_SAMPLED_HH #ifndef GRAPH_DISTANCE_SAMPLED_HH
#define GRAPH_DISTANCE_SAMPLED_HH #define GRAPH_DISTANCE_SAMPLED_HH
#include <tr1/unordered_set>
#include <boost/graph/breadth_first_search.hpp> #include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp> #include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/random.hpp>
#include <boost/python/object.hpp>
#include <boost/python/list.hpp>
#include <boost/python/extract.hpp>
#include <tr1/random>
#include "histogram.hh"
#include "numpy_bind.hh"
namespace graph_tool namespace graph_tool
{ {
using namespace std; using namespace std;
using namespace boost; using namespace boost;
typedef boost::mt19937 rng_t; // retrieves the sampled vertex-vertex distance histogram
// retrieves the histogram of sampled vertex-vertex distances
struct no_weightS {}; struct no_weightS {};
struct get_sampled_distances template <class Map>
struct get_val_type
{ {
typedef typename property_traits<Map>::value_type type;
};
template <class Graph, class IndexMap, class WeightMap, class Hist> template <>
void operator()(const Graph* gp, IndexMap index_map, WeightMap weights, struct get_val_type<no_weightS>
Hist& hist, size_t samples, size_t seed) const {
typedef size_t type;
};
struct get_sampled_distance_histogram
{
template <class Graph, class VertexIndex, class WeightMap, class RNG>
void operator()(const Graph& g, VertexIndex vertex_index, WeightMap weights,
size_t n_samples, const vector<long double>& obins,
python::object& phist, RNG& rng) const
{ {
const Graph& g = *gp;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
// select get_sum_vertex_dists based on the existence of weights // select get_vertex_dists based on the existence of weights
typedef typename mpl::if_<is_same<WeightMap, no_weightS>, typedef typename mpl::if_<is_same<WeightMap, no_weightS>,
get_dists_bfs, get_dists_bfs,
get_dists_djk>::type get_vertex_dists_t; get_dists_djk>::type get_vertex_dists_t;
get_vertex_dists_t get_vertex_dists;
tr1::unordered_map<size_t,vertex_t> descriptors;
typename graph_traits<Graph>::vertex_iterator v, v_end; // distance type
int i = 0, N = 0; typedef typename get_val_type<WeightMap>::type val_type;
for(tie(v, v_end) = vertices(g); v != v_end; ++v,++i) typedef Histogram<val_type, size_t, 1> hist_t;
{
descriptors[i] = *v;
N++;
}
rng_t rng(static_cast<rng_t::result_type>(seed)); array<vector<val_type>,1> bins;
uniform_int<size_t> sampler(0,descriptors.size()-1); bins[0].resize(obins.size());
for (size_t i = 0; i < obins.size(); ++i)
bins[0][i] = obins[i];
#pragma omp parallel for default(shared) private(i,v,v_end) // only used for constant-sized bins
for(i=0; i < int(samples); ++i) boost::array<pair<val_type, val_type>, 1> data_range;
{ data_range[0].first = data_range[0].second = 0;
typedef HashedDescriptorMap<IndexMap,double> dist_map_t;
dist_map_t dist_map(index_map); hist_t hist(bins, data_range);
SharedHistogram<hist_t> s_hist(hist);
for(tie(v, v_end) = vertices(g); v != v_end; ++v) vector<vertex_t> sources;
dist_map[*v] = numeric_limits<double>::max(); sources.reserve(num_vertices(g));
vertex_t s,t; int i;
for (i = 0; i < num_vertices(g); ++i)
if (vertex(i,g) != graph_traits<Graph>::null_vertex())
sources.push_back(vertex(i,g));
n_samples = min(n_samples, sources.size());
#pragma omp critical typename hist_t::point_t point;
get_vertex_dists_t get_vertex_dists;
#pragma omp parallel for default(shared) private(i,point) \
firstprivate(s_hist) schedule(dynamic)
for (i = 0; i < int(n_samples); ++i)
{
vertex_t v;
{ {
s = descriptors[sampler(rng)]; #pragma omp critical
do tr1::uniform_int<size_t> randint(0, sources.size()-1);
{ size_t i = randint(rng);
t = descriptors[sampler(rng)]; v = sources[i];
} swap(sources[i], sources.back());
while (t == s && N != 1); sources.pop_back();
} }
dist_map[s] = 0.0; unchecked_vector_property_map<val_type,VertexIndex>
get_vertex_dists(g, s, index_map, dist_map, weights); dist_map(vertex_index, num_vertices(g));