Commit 29f83ea6 authored by Tiago Peixoto's avatar Tiago Peixoto

Resuscitate distance_histogram() and sampled_distance_histogram()

parent b4249772
......@@ -55,14 +55,23 @@ public:
Histogram(const boost::array<std::vector<ValueType>, Dim>& bins,
const boost::array<std::pair<ValueType,ValueType>,Dim>&
data_range)
: _bins(bins), _data_range(data_range)
: _bins(bins), _data_range(data_range), _grow(Dim, false)
{
bin_t new_shape;
for (size_t j = 0; j < Dim; ++j)
if (_bins[j].size() == 1) // constant bin width
{
new_shape[j] = floor((_data_range[j].second -
_data_range[j].first)/_bins[j][0]) + 1;
if (_data_range[j].first == _data_range[j].second)
{
_grow[j] = true;
new_shape[j] == 1;
}
else
{
new_shape[j] = floor((_data_range[j].second -
_data_range[j].first) /
_bins[j][0]) + 1;
}
}
else
{
......@@ -79,6 +88,15 @@ public:
if (_bins[i].size() == 1) // constant bin width
{
bin[i] = (v[i] - _data_range[i].first)/_bins[i][0];
if (_grow[i] && (bin[i] >= _counts.shape()[i]))
{
boost::array<size_t, Dim> new_shape;
for (size_t j = 0; j < Dim; ++j)
new_shape[j] = _counts.shape()[j];
new_shape[i] = bin[i]+1;
_counts.resize(new_shape);
_data_range[i].second = max(v[i], _data_range[i].second);
}
}
else // arbitrary bins. do a binary search
{
......@@ -127,6 +145,7 @@ protected:
boost::multi_array<CountType,Dim> _counts;
boost::array<std::vector<ValueType>, Dim> _bins;
boost::array<std::pair<ValueType,ValueType>,Dim> _data_range;
std::vector<bool> _grow;
};
......
......@@ -14,12 +14,17 @@ libgraph_tool_stats_la_SOURCES = \
graph_histograms.cc \
graph_average.cc \
graph_parallel.cc \
graph_distance.cc \
graph_distance_sampled.cc \
graph_stats_bind.cc
libgraph_tool_stats_la_include_HEADERS = \
graph_parallel.hh \
graph_histograms.hh \
graph_average.hh
graph_average.hh \
graph_distance_sampled.hh \
graph_distance.hh
libgraph_tool_stats_la_LIBADD = $(MOD_LIBADD)
......
......@@ -24,34 +24,34 @@
using namespace std;
using namespace boost;
using namespace boost::lambda;
using namespace graph_tool;
hist_t GraphInterface::GetDistanceHistogram(string weight) const
typedef Histogram<size_t, size_t, 1> hist_t;
python::object distance_histogram(GraphInterface& gi, boost::any weight,
const vector<long double>& bins)
{
hist_t hist;
python::object ret;
if (weight == "")
if (weight.empty())
{
run_action<>()(*this,
run_action<>()(gi,
bind<void>(get_distance_histogram(), _1,
_vertex_index, no_weightS(), var(hist)))();
gi.GetVertexIndex(), no_weightS(),
ref(bins), ref(ret)))();
}
else
{
try
{
run_action<>()(*this,
bind<void>(get_distance_histogram(), _1,
_vertex_index, _2, var(hist)),
edge_scalar_properties())
(prop(weight, _edge_index, _properties));
}
catch (property_not_found& e)
{
throw GraphException("error getting edge scalar property: " +
string(e.what()));
}
run_action<>()(gi,
bind<void>(get_distance_histogram(), _1,
gi.GetVertexIndex(), _2,
ref(bins), ref(ret)),
edge_scalar_properties())(weight);
}
return hist;
return ret;
}
void export_distance()
{
python::def("distance_histogram", &distance_histogram);
}
......@@ -18,10 +18,16 @@
#ifndef GRAPH_DISTANCE_HH
#define GRAPH_DISTANCE_HH
#include <tr1/unordered_set>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/python/object.hpp>
#include <boost/python/list.hpp>
#include <boost/python/extract.hpp>
#include "histogram.hh"
#include "numpy_bind.hh"
namespace graph_tool
{
using namespace std;
......@@ -31,59 +37,97 @@ using namespace boost;
struct no_weightS {};
template <class Map>
struct get_val_type
{
typedef typename property_traits<Map>::value_type type;
};
template <>
struct get_val_type<no_weightS>
{
typedef size_t type;
};
struct get_distance_histogram
{
template <class Graph, class IndexMap, class WeightMap, class Hist>
void operator()(const Graph *gp, IndexMap index_map, WeightMap weights,
Hist& hist) const
template <class Graph, class VertexIndex, class WeightMap>
void operator()(const Graph& g, VertexIndex vertex_index, WeightMap weights,
const vector<long double>& obins, python::object& phist)
const
{
const Graph& g = *gp;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
// select get_vertex_dists based on the existence of weights
typedef typename mpl::if_<is_same<WeightMap, no_weightS>,
get_dists_bfs,
get_dists_bfs,
get_dists_djk>::type get_vertex_dists_t;
// distance type
typedef typename get_val_type<WeightMap>::type val_type;
typedef Histogram<val_type, size_t, 1> hist_t;
array<vector<val_type>,1> bins;
bins[0].resize(obins.size());
for (size_t i = 0; i < obins.size(); ++i)
bins[0][i] = obins[i];
// only used for constant-sized bins
boost::array<pair<val_type, val_type>, 1> data_range;
data_range[0].first = data_range[0].second = 0;
hist_t hist(bins, data_range);
SharedHistogram<hist_t> s_hist(hist);
typename hist_t::point_t point;
get_vertex_dists_t get_vertex_dists;
int i, N = num_vertices(g);
#pragma omp parallel for default(shared) private(i)
#pragma omp parallel for default(shared) private(i,point) \
firstprivate(s_hist) schedule(dynamic)
for (i = 0; i < N; ++i)
{
vertex_t v = vertex(i, g);
if (v == graph_traits<Graph>::null_vertex())
continue;
typedef tr1::unordered_map<vertex_t,double,
DescriptorHash<IndexMap> > dmap_t;
dmap_t dmap(0, DescriptorHash<IndexMap>(index_map));
InitializedPropertyMap<dmap_t>
dist_map(dmap, numeric_limits<double>::max());
unchecked_vector_property_map<val_type,VertexIndex>
dist_map(vertex_index, num_vertices(g));
for (size_t j = 0; j < N; ++j)
{
if (vertex(i,g) != graph_traits<Graph>::null_vertex())
dist_map[vertex(j,g)] = numeric_limits<val_type>::max();
}
dist_map[v] = 0.0;
get_vertex_dists(g, v, index_map, dist_map, weights);
dist_map[v] = 0;
get_vertex_dists(g, v, vertex_index, dist_map, weights);
typename graph_traits<Graph>::vertex_iterator v2, v_end;
for (tie(v2, v_end) = vertices(g); v2 != v_end; ++v2)
if (*v2 != v && dist_map[*v2] != numeric_limits<double>::max())
if (*v2 != v &&
dist_map[*v2] != numeric_limits<val_type>::max())
{
double dist = dist_map[*v2];
#pragma omp atomic
hist[dist]++;
point[0] = dist_map[*v2];
s_hist.PutValue(point);
}
}
s_hist.Gather();
python::list ret;
ret.append(wrap_multi_array_owned<size_t,1>(hist.GetArray()));
ret.append(wrap_vector_owned<val_type>(hist.GetBins()[0]));
phist = ret;
}
// weighted version. Use dijkstra_shortest_paths()
struct get_dists_djk
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap,
class WeightMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map,
template <class Graph, class Vertex, class VertexIndex,
class DistanceMap, class WeightMap>
void operator()(const Graph& g, Vertex s, VertexIndex vertex_index,
DistanceMap dist_map, WeightMap weights) const
{
dijkstra_shortest_paths(g, s, vertex_index_map(index_map).
dijkstra_shortest_paths(g, s, vertex_index_map(vertex_index).
weight_map(weights).distance_map(dist_map));
}
};
......@@ -91,14 +135,15 @@ struct get_distance_histogram
// unweighted version. Use BFS.
struct get_dists_bfs
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map,
template <class Graph, class Vertex, class VertexIndex,
class DistanceMap>
void operator()(const Graph& g, Vertex s, VertexIndex vertex_index,
DistanceMap dist_map, no_weightS) const
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef tr1::unordered_map<vertex_t,default_color_type,
DescriptorHash<IndexMap> > cmap_t;
cmap_t cmap(0, DescriptorHash<IndexMap>(index_map));
DescriptorHash<VertexIndex> > cmap_t;
cmap_t cmap(0, DescriptorHash<VertexIndex>(vertex_index));
InitializedPropertyMap<cmap_t>
color_map(cmap, color_traits<default_color_type>::white());
......
......@@ -17,46 +17,46 @@
#include "graph_filtering.hh"
#include "graph.hh"
#include "histogram.hh"
#include "graph_selectors.hh"
#include "graph_properties.hh"
#include <boost/lambda/bind.hpp>
#include "graph_distance_sampled.hh"
typedef std::tr1::mt19937 rng_t;
using namespace std;
using namespace boost;
using namespace boost::lambda;
using namespace graph_tool;
hist_t
GraphInterface::GetSampledDistanceHistogram(string weight, size_t samples,
size_t seed) const
typedef Histogram<size_t, size_t, 1> hist_t;
python::object sampled_distance_histogram(GraphInterface& gi, boost::any weight,
const vector<long double>& bins,
size_t n_samples, size_t seed)
{
hist_t hist;
rng_t rng(static_cast<rng_t::result_type>(seed));
python::object ret;
if (weight == "")
if (weight.empty())
{
run_action<>()(*this, bind<void>(get_sampled_distances(), _1,
_vertex_index, no_weightS(), var(hist),
samples, seed))();
run_action<>()(gi,
bind<void>(get_sampled_distance_histogram(), _1,
gi.GetVertexIndex(), no_weightS(), n_samples,
ref(bins), ref(ret), ref(rng)))();
}
else
{
try
{
run_action<>()(*this,
bind<void>(get_sampled_distances(), _1,
_vertex_index, _2, var(hist), samples,
seed), edge_scalar_properties())
(prop(weight, _edge_index, _properties));
}
catch (property_not_found& e)
{
throw GraphException("error getting scalar property: " +
string(e.what()));
}
run_action<>()(gi,
bind<void>(get_sampled_distance_histogram(), _1,
gi.GetVertexIndex(), _2, n_samples,
ref(bins), ref(ret), ref(rng)),
edge_scalar_properties())(weight);
}
return hist;
return ret;
}
void export_sampled_distance()
{
python::def("sampled_distance_histogram", &sampled_distance_histogram);
}
......@@ -18,92 +18,133 @@
#ifndef GRAPH_DISTANCE_SAMPLED_HH
#define GRAPH_DISTANCE_SAMPLED_HH
#include <tr1/unordered_set>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/random.hpp>
#include <boost/python/object.hpp>
#include <boost/python/list.hpp>
#include <boost/python/extract.hpp>
#include <tr1/random>
#include "histogram.hh"
#include "numpy_bind.hh"
namespace graph_tool
{
using namespace std;
using namespace boost;
typedef boost::mt19937 rng_t;
// retrieves the histogram of sampled vertex-vertex distances
// retrieves the sampled vertex-vertex distance histogram
struct no_weightS {};
struct get_sampled_distances
template <class Map>
struct get_val_type
{
typedef typename property_traits<Map>::value_type type;
};
template <class Graph, class IndexMap, class WeightMap, class Hist>
void operator()(const Graph* gp, IndexMap index_map, WeightMap weights,
Hist& hist, size_t samples, size_t seed) const
template <>
struct get_val_type<no_weightS>
{
typedef size_t type;
};
struct get_sampled_distance_histogram
{
template <class Graph, class VertexIndex, class WeightMap, class RNG>
void operator()(const Graph& g, VertexIndex vertex_index, WeightMap weights,
size_t n_samples, const vector<long double>& obins,
python::object& phist, RNG& rng) const
{
const Graph& g = *gp;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
// select get_sum_vertex_dists based on the existence of weights
// select get_vertex_dists based on the existence of weights
typedef typename mpl::if_<is_same<WeightMap, no_weightS>,
get_dists_bfs,
get_dists_bfs,
get_dists_djk>::type get_vertex_dists_t;
get_vertex_dists_t get_vertex_dists;
tr1::unordered_map<size_t,vertex_t> descriptors;
typename graph_traits<Graph>::vertex_iterator v, v_end;
int i = 0, N = 0;
for(tie(v, v_end) = vertices(g); v != v_end; ++v,++i)
{
descriptors[i] = *v;
N++;
}
// distance type
typedef typename get_val_type<WeightMap>::type val_type;
typedef Histogram<val_type, size_t, 1> hist_t;
rng_t rng(static_cast<rng_t::result_type>(seed));
uniform_int<size_t> sampler(0,descriptors.size()-1);
array<vector<val_type>,1> bins;
bins[0].resize(obins.size());
for (size_t i = 0; i < obins.size(); ++i)
bins[0][i] = obins[i];
#pragma omp parallel for default(shared) private(i,v,v_end)
for(i=0; i < int(samples); ++i)
{
typedef HashedDescriptorMap<IndexMap,double> dist_map_t;
dist_map_t dist_map(index_map);
// only used for constant-sized bins
boost::array<pair<val_type, val_type>, 1> data_range;
data_range[0].first = data_range[0].second = 0;
hist_t hist(bins, data_range);
SharedHistogram<hist_t> s_hist(hist);
for(tie(v, v_end) = vertices(g); v != v_end; ++v)
dist_map[*v] = numeric_limits<double>::max();
vertex_t s,t;
vector<vertex_t> sources;
sources.reserve(num_vertices(g));
int i;
for (i = 0; i < num_vertices(g); ++i)
if (vertex(i,g) != graph_traits<Graph>::null_vertex())
sources.push_back(vertex(i,g));
n_samples = min(n_samples, sources.size());
#pragma omp critical
typename hist_t::point_t point;
get_vertex_dists_t get_vertex_dists;
#pragma omp parallel for default(shared) private(i,point) \
firstprivate(s_hist) schedule(dynamic)
for (i = 0; i < int(n_samples); ++i)
{
vertex_t v;
{
s = descriptors[sampler(rng)];
do
{
t = descriptors[sampler(rng)];
}
while (t == s && N != 1);
#pragma omp critical
tr1::uniform_int<size_t> randint(0, sources.size()-1);
size_t i = randint(rng);
v = sources[i];
swap(sources[i], sources.back());
sources.pop_back();
}
dist_map[s] = 0.0;
get_vertex_dists(g, s, index_map, dist_map, weights);
if (dist_map[t] != numeric_limits<double>::max() &&
dist_map[t] != 0.0)
unchecked_vector_property_map<val_type,VertexIndex>
dist_map(vertex_index, num_vertices(g));
for (size_t j = 0; j < num_vertices(g); ++j)
{
#pragma omp atomic
hist[dist_map[t]]++;
if (vertex(i,g) != graph_traits<Graph>::null_vertex())
dist_map[vertex(j,g)] = numeric_limits<val_type>::max();
}
dist_map[v] = 0;
get_vertex_dists(g, v, vertex_index, dist_map, weights);
typename graph_traits<Graph>::vertex_iterator v2, v_end;
for (tie(v2, v_end) = vertices(g); v2 != v_end; ++v2)
if (*v2 != v &&
dist_map[*v2] != numeric_limits<val_type>::max())
{
point[0] = dist_map[*v2];
s_hist.PutValue(point);
}
}
s_hist.Gather();
python::list ret;
ret.append(wrap_multi_array_owned<size_t,1>(hist.GetArray()));
ret.append(wrap_vector_owned<val_type>(hist.GetBins()[0]));
phist = ret;
}
// weighted version. Use dijkstra_shortest_paths()
struct get_dists_djk
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap,
class WeightMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map,
template <class Graph, class Vertex, class VertexIndex,
class DistanceMap, class WeightMap>
void operator()(const Graph& g, Vertex s, VertexIndex vertex_index,
DistanceMap dist_map, WeightMap weights) const
{
dijkstra_shortest_paths(g, s, vertex_index_map(index_map).
dijkstra_shortest_paths(g, s, vertex_index_map(vertex_index).
weight_map(weights).distance_map(dist_map));
}
};
......@@ -111,19 +152,27 @@ struct get_sampled_distances
// unweighted version. Use BFS.
struct get_dists_bfs
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map,
template <class Graph, class Vertex, class VertexIndex,
class DistanceMap>
void operator()(const Graph& g, Vertex s, VertexIndex vertex_index,
DistanceMap dist_map, no_weightS) const
{
breadth_first_search(g, s,
visitor(make_bfs_visitor
(record_distances(dist_map,
on_tree_edge()))));
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef tr1::unordered_map<vertex_t,default_color_type,
DescriptorHash<VertexIndex> > cmap_t;
cmap_t cmap(0, DescriptorHash<VertexIndex>(vertex_index));
InitializedPropertyMap<cmap_t>
color_map(cmap, color_traits<default_color_type>::white());
breadth_first_visit(g, s,
visitor(make_bfs_visitor
(record_distances(dist_map,
on_tree_edge()))).
color_map(color_map));
}
};
};
} // graph_tool namespace
} // boost namespace
#endif // GRAPH_DISTANCE_SAMPLED_HH
......@@ -76,11 +76,11 @@ public:
void operator()(Graph& g, typename graph_traits<Graph>::vertex_descriptor v,
EdgeProperty& eprop, Hist& hist)
{
typename Hist::point_t p;
typename graph_traits<Graph>::out_edge_iterator e, e_begin, e_end;
tie(e_begin,e_end) = out_edges(v,g);
for(e = e_begin; e != e_end; ++e)
{
typename Hist::point_t p;
p[0] = eprop[*e];
hist.PutValue(p);
}
......@@ -137,11 +137,19 @@ struct get_histogram
// find the data range
pair<value_type,value_type> range;
typename graph_traits<Graph>::vertex_iterator vi,vi_end;
range.first = boost::numeric::bounds<value_type>::highest();
range.second = boost::numeric::bounds<value_type>::lowest();
for (tie(vi, vi_end) = vertices(g); vi != vi_end; ++vi)