Commit 1152b5ed authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

* add GetDistanceHistogram, instead of GetAverageDistance

* add GetSampledDistanceHistogram (in src/graph/graph_distance_sampled.cc)
* fix BFS initialization bug in src/graph/graph_extended_clustering.cc


git-svn-id: https://svn.forked.de/graph-tool/trunk@38 d4600afd-f417-0410-95de-beed9576f240
parent ec124ab3
......@@ -97,8 +97,12 @@ statistics.add_option("--number-of-edges", action="callback", callback=push_opti
statistics.add_option("--vertex-histogram", action="callback", callback=push_option, type="string", metavar="DEGREE|FILE", help="get the vertex degree/property histogram")
statistics.add_option("--edge-histogram", action="callback", callback=push_option, type="string", metavar="PROPERTY|FILE", help="get the edge property histogram")
statistics.add_option("--combined-vertex-histogram", action="callback", callback=push_option, type="string", metavar="DEGREE1|DEGREE2|FILE", help="get the combined (DEGREE1,DEGREE2) histogram. Scalar properties are also accepted as DEGREE1 or DEGREE2")
statistics.add_option("--distance-histogram", action="callback", callback=push_option, type="string", metavar="[WEIGHT|]FILE", help="get the distance histogram")
statistics.add_option("--average-distance", action="callback", callback=push_option, type="string", metavar="[WEIGHT|]FILE", help="get the averarge distance")
statistics.add_option("--average-harmonic-distance", action="callback", callback=push_option, type="string", metavar="[WEIGHT|]FILE", help="get the averarge harmonic distance")
statistics.add_option("--sampled-distance-histogram", action="callback", callback=push_option, type="string", metavar="[WEIGHT|]SAMPLES|SEED|FILE", help="get the sampled distance histogram")
statistics.add_option("--average-sampled-distance", action="callback", callback=push_option, type="string", metavar="[WEIGHT|]SAMPLES|SEED|FILE", help="get the average sampled distance")
statistics.add_option("--average-sampled-harmonic-distance", action="callback", callback=push_option, type="string", metavar="[WEIGHT|]SAMPLES|SEED|FILE", help="get the average sampled harmonic distance")
statistics.add_option("--component-size-histogram", action="callback", callback=push_option, type="string", metavar="FILE", help="get the component size histogram")
statistics.add_option("--average-vertex-property", action="callback", callback=push_option, type="string", metavar="PROPERTY|FILE", help="get the average of the vertex property")
statistics.add_option("--average-edge-property", action="callback", callback=push_option, type="string", metavar="PROPERTY|FILE", help="get the average of the edge property")
......@@ -158,6 +162,16 @@ def degree(name):
deg = Degree.Total
return deg
def get_mean(hist):
avg,dev,count = 0.0,0.0,0.0
for k,v in hist.iteritems():
avg += k*v
dev += k*k*v
count += v
avg /= count
dev = math.sqrt(dev/count - avg*avg)/math.sqrt(count)
return (avg,dev)
class OptionError(Exception):
def __init__(self, option, error):
self.what = "error parsing option %s: %s" % (option,error)
......@@ -302,6 +316,17 @@ def parse_option(opt, just_file=False):
if just_file:
return values[2]
return (graph.GetAverageCombinedVertexCorrelation(degree(values[0]),degree(values[1])), values[2])
elif opt.name == "distance-histogram":
values = parse_values(opt.value)
if len(values) > 2 or len(values) < 1:
raise OptionError(opt.name, "invalid value '%s'" % opt.value)
file_name, weight = values[0],""
if len(values) > 1:
weight = values[0]
file_name = values[1]
if just_file:
return file_name
return (graph.GetDistanceHistogram(weight), file_name)
elif opt.name == "average-distance":
values = parse_values(opt.value)
if len(values) > 2 or len(values) < 1:
......@@ -312,7 +337,7 @@ def parse_option(opt, just_file=False):
file_name = values[1]
if just_file:
return file_name
return (graph.GetAverageDistance(weight), file_name)
return ("%f\t%f" % get_mean(graph.GetDistanceHistogram(weight)), file_name)
elif opt.name == "average-harmonic-distance":
values = parse_values(opt.value)
if len(values) > 2 or len(values) < 1:
......@@ -323,7 +348,44 @@ def parse_option(opt, just_file=False):
file_name = values[1]
if just_file:
return file_name
return (graph.GetAverageHarmonicDistance(weight), file_name)
hist = graph.GetDistanceHistogram(weight)
avg, err = hist_mean(dict((1.0/k,v) for k,v in hist.iteritems()))
return ("%f\t%f" % (1.0/avg,1.0/err) , file_name)
elif opt.name == "sampled-distance-histogram":
values = parse_values(opt.value)
if len(values) > 4 or len(values) < 3:
raise OptionError(opt.name, "invalid value '%s'" % opt.value)
if len(values) == 3:
weight, samples, seed, file_name = "", int(values[0]), int(values[1]), values[2]
else:
weight, samples, seed, file_name = values[0], int(values[1]), int(values[2]), values[3]
if just_file:
return file_name
return (graph.GetSampledDistanceHistogram(weight, samples, seed), file_name)
elif opt.name == "average-sampled-distance":
values = parse_values(opt.value)
if len(values) > 4 or len(values) < 3:
raise OptionError(opt.name, "invalid value '%s'" % opt.value)
if len(values) == 3:
weight, samples, seed, file_name = "", int(values[0]), int(values[1]), values[2]
else:
weight, samples, seed, file_name = values[0], int(values[1]), int(values[2]), values[3]
if just_file:
return file_name
return ("%f\t%f" % get_mean(graph.GetSampledDistanceHistogram(weight, samples, seed)), file_name)
elif opt.name == "average-sampled-harmonic-distance":
values = parse_values(opt.value)
if len(values) > 4 or len(values) < 3:
raise OptionError(opt.name, "invalid value '%s'" % opt.value)
if len(values) == 3:
weight, samples, seed, file_name = "", int(values[0]), int(values[1]), values[2]
else:
weight, samples, seed, file_name = values[0], int(values[1]), int(values[2]), values[3]
if just_file:
return file_name
hist = graph.GetSampledDistanceHistogram(weight, samples, seed)
avg, err = hist_mean(dict((1.0/k,v) for k,v in hist.iteritems()))
return ("%f\t%f" % (1.0/avg,1.0/err) , file_name)
elif opt.name == "component-size-histogram":
if just_file:
return opt.value
......@@ -398,15 +460,8 @@ def parse_option(opt, just_file=False):
deg = degree(values[0])
hist = graph.GetVertexHistogram(deg)
else:
hist = graph.GetEdgeHistogram(values[0])
avg,dev,count = 0.0,0.0,0.0
for k,v in hist.iteritems():
avg += k*v
dev += k*k*v
count += v
avg /= count
dev = math.sqrt(dev/count - avg*avg)/math.sqrt(count)
ret = "%f\t%f" % (avg, dev)
hist = graph.GetEdgeHistogram(values[0])
ret = "%f\t%f" % get_mean(hist)
if just_file:
return values[1]
return (ret,values[1])
......
......@@ -33,6 +33,7 @@ libgraph_tool_la_SOURCES = \
graph_extended_clustering.cc\
graph_generation.cc\
graph_distance.cc\
graph_distance_sampled.cc\
graph_io.cc\
graph_bind.cc\
graphml.hpp\
......
......@@ -90,8 +90,8 @@ public:
// other
hist_t GetComponentSizeHistogram() const;
double GetAverageDistance(std::string weight) const;
double GetAverageHarmonicDistance(std::string weight) const;
hist_t GetDistanceHistogram(std::string weight) const;
hist_t GetSampledDistanceHistogram(std::string weight, size_t samples, size_t seed) const;
// filtering
void SetDirected(bool directed) {_directed = directed;}
......
......@@ -226,8 +226,8 @@ BOOST_PYTHON_MODULE(libgraph_tool)
.def("GetGlobalClustering", &GraphInterfaceWrap::GetGlobalClustering)
.def("SetLocalClusteringToProperty", &GraphInterfaceWrap::SetLocalClusteringToProperty)
.def("SetExtendedClusteringToProperty", &GraphInterfaceWrap::SetExtendedClusteringToProperty)
.def("GetAverageDistance", &GraphInterfaceWrap::GetAverageDistance)
.def("GetAverageHarmonicDistance", &GraphInterfaceWrap::GetAverageHarmonicDistance)
.def("GetDistanceHistogram", &GraphInterfaceWrap::GetDistanceHistogram)
.def("GetSampledDistanceHistogram", &GraphInterfaceWrap::GetSampledDistanceHistogram)
.def("SetDirected", &GraphInterfaceWrap::SetDirected)
.def("GetDirected", &GraphInterfaceWrap::GetDirected)
.def("SetReversed", &GraphInterfaceWrap::SetReversed)
......
......@@ -21,6 +21,7 @@
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/lambda/bind.hpp>
#include <iostream>
#include "graph.hh"
#include "histogram.hh"
......@@ -34,163 +35,72 @@ using namespace boost::lambda;
using namespace graph_tool;
//==============================================================================
// bfs_distance_sum_visitor
// This event visitor will record and sum all the distances during a BFS.
//==============================================================================
struct normal_distance
{
double operator()(double d) const { return d; }
};
struct harmonic_distance
{
double operator()(double d) const { return 1.0/d; }
};
template <class DistanceSelector, class DistanceMap, class Graph>
class bfs_distance_sum_visitor: public default_bfs_visitor
{
public:
bfs_distance_sum_visitor(DistanceMap distance_map, double &sum)
:_distmap(distance_map), _distsum(sum) { }
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
void tree_edge(edge_t e, const Graph & g)
{
double d = _distmap[source(e,g)] + 1.0;
_distmap[target(e,g)] = d; // record distance
_distsum += _distance(d);
}
void initialize_vertex(vertex_t u, const Graph &g)
{
_distmap[u] = 0.0;
}
private:
DistanceMap _distmap;
double &_distsum;
DistanceSelector _distance;
};
//==============================================================================
// GetAverageDistance()
// retrieves the average vertex-vertex distance
// GetDistanceHistogram()
// retrieves the vertex-vertex distance histogram
//==============================================================================
struct no_weightS {};
template <class DistanceSelector>
struct get_average_distance
struct get_distance_histogram
{
template <class Graph, class IndexMap, class WeightMap>
void operator()(const Graph &g, IndexMap index_map, WeightMap weights, double &dist) const
template <class Graph, class IndexMap, class WeightMap, class Hist>
void operator()(const Graph &g, IndexMap index_map, WeightMap weights, Hist& hist) const
{
typedef HashedDescriptorMap<IndexMap,double> dist_map_t;
dist_map_t dist_map(index_map);
double distsum = 0;
size_t n = 0;
// select get_sum_vertex_dists based on the existence of weights
// select get_vertex_dists based on the existence of weights
typedef typename mpl::if_<is_same<WeightMap, no_weightS>,
get_sum_dists_bfs,
get_sum_dists_djk>::type get_sum_vertex_dists;
get_sum_vertex_dists get_sum_dists;
typename graph_traits<Graph>::vertex_iterator v, v_begin, v_end;
tie(v_begin, v_end) = vertices(g);
for(v = v_begin; v != v_end; ++v)
get_dists_bfs,
get_dists_djk>::type get_vertex_dists_t;
get_vertex_dists_t get_vertex_dists;
typename graph_traits<Graph>::vertex_iterator v, v2, v_end;
for (tie(v, v_end) = vertices(g); v != v_end; ++v)
{
distsum += get_sum_dists(g, *v, index_map, dist_map, weights);
n++;
}
DistanceSelector distance;
dist = n<2?0.0:distance(distsum/(n*(n-1)));
typedef tr1::unordered_map<typename graph_traits<Graph>::vertex_descriptor,double,DescriptorHash<IndexMap> > dmap_t;
dmap_t dmap(0, DescriptorHash<IndexMap>(index_map));
InitializedPropertyMap<dmap_t> dist_map(dmap, numeric_limits<double>::max());
dist_map[*v] = 0.0;
get_vertex_dists(g, *v, index_map, dist_map, weights);
for (v2 = vertices(g).first; v2 != v_end; ++v2)
if (*v2 != *v && dist_map[*v2] != numeric_limits<double>::max())
hist[dist_map[*v2]]++;
}
}
// weighted version. Use dijkstra_shortest_paths()
struct get_sum_dists_djk
struct get_dists_djk
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap, class WeightMap>
double operator()(const Graph& g, Vertex s, IndexMap index_map, DistanceMap dist_map, WeightMap weights) const
void operator()(const Graph& g, Vertex s, IndexMap index_map, DistanceMap dist_map, WeightMap weights) const
{
double distsum = 0.0;
dijkstra_shortest_paths(g, s, vertex_index_map(index_map).weight_map(weights).distance_map(dist_map));
DistanceSelector get_dist;
typename graph_traits<Graph>::vertex_iterator v, v_begin, v_end;
tie(v_begin, v_end) = vertices(g);
for(v = v_begin; v != v_end; ++v)
if (dist_map[*v] != std::numeric_limits<double>::max() && *v != s)
distsum += get_dist(dist_map[*v]);
return distsum;
dijkstra_shortest_paths(g, s, vertex_index_map(index_map).weight_map(weights).distance_map(dist_map));
}
};
// unweighted version. Use BFS.
struct get_sum_dists_bfs
struct get_dists_bfs
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap>
double operator()(const Graph& g, Vertex s, IndexMap index_map, DistanceMap dist_map, no_weightS) const
void operator()(const Graph& g, Vertex s, IndexMap index_map, DistanceMap dist_map, no_weightS) const
{
double distsum = 0.0;
bfs_distance_sum_visitor<DistanceSelector,DistanceMap,Graph> bfs_sum_dists(dist_map, distsum);
breadth_first_search(g, s, visitor(bfs_sum_dists));
return distsum;
typedef tr1::unordered_map<typename graph_traits<Graph>::vertex_descriptor,default_color_type,DescriptorHash<IndexMap> > cmap_t;
cmap_t cmap(0, DescriptorHash<IndexMap>(index_map));
InitializedPropertyMap<cmap_t> color_map(cmap, color_traits<default_color_type>::white());
breadth_first_visit(g, s, visitor(make_bfs_visitor(record_distances(dist_map, on_tree_edge()))).color_map(color_map));
}
};
};
double GraphInterface::GetAverageDistance(string weight) const
GraphInterface::hist_t GraphInterface::GetDistanceHistogram(string weight) const
{
double avg_dist = 0;
if (weight == "")
{
check_filter(*this, bind<void>(get_average_distance<normal_distance>(), _1, _vertex_index, no_weightS(), var(avg_dist)),
reverse_check(), directed_check());
}
else
{
try
{
dynamic_property_map& weight_prop = find_property_map(_properties, weight, typeid(graph_traits<multigraph_t>::edge_descriptor));
try
{
vector_property_map<double, edge_index_map_t> weight_map;
weight_map = get_static_property_map<vector_property_map<double, edge_index_map_t> >(weight_prop);
check_filter(*this, bind<void>(get_average_distance<normal_distance>(), _1, _vertex_index, weight_map, var(avg_dist)),
reverse_check(), directed_check());
}
catch (bad_cast)
{
DynamicPropertyMapWrap<double, graph_traits<multigraph_t>::edge_descriptor> weight_map(weight_prop);
check_filter(*this, bind<void>(get_average_distance<normal_distance>(), _1, _vertex_index, weight_map, var(avg_dist)),
reverse_check(), directed_check());
}
}
catch (property_not_found& e)
{
throw GraphException("error getting scalar property: " + string(e.what()));
}
}
return avg_dist;
}
//==============================================================================
// GetAverageHarmonicDistance()
// retrieves the average vertex-vertex harmonic distance
//==============================================================================
hist_t hist;
double GraphInterface::GetAverageHarmonicDistance(string weight) const
{
double avg_dist = 0;
if (weight == "")
{
check_filter(*this, bind<void>(get_average_distance<harmonic_distance>(), _1, _vertex_index, no_weightS(), var(avg_dist)),
check_filter(*this, bind<void>(get_distance_histogram(), _1, _vertex_index, no_weightS(), var(hist)),
reverse_check(), directed_check());
}
else
......@@ -202,13 +112,13 @@ double GraphInterface::GetAverageHarmonicDistance(string weight) const
{
vector_property_map<double, edge_index_map_t> weight_map;
weight_map = get_static_property_map<vector_property_map<double, edge_index_map_t> >(weight_prop);
check_filter(*this, bind<void>(get_average_distance<harmonic_distance>(), _1, _vertex_index, weight_map, var(avg_dist)),
check_filter(*this, bind<void>(get_distance_histogram(), _1, _vertex_index, weight_map, var(hist)),
reverse_check(), directed_check());
}
catch (bad_cast)
{
DynamicPropertyMapWrap<double, graph_traits<multigraph_t>::edge_descriptor> weight_map(weight_prop);
check_filter(*this, bind<void>(get_average_distance<harmonic_distance>(), _1, _vertex_index, weight_map, var(avg_dist)),
check_filter(*this, bind<void>(get_distance_histogram(), _1, _vertex_index, weight_map, var(hist)),
reverse_check(), directed_check());
}
}
......@@ -217,5 +127,5 @@ double GraphInterface::GetAverageHarmonicDistance(string weight) const
throw GraphException("error getting scalar property: " + string(e.what()));
}
}
return avg_dist;
return hist;
}
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006 Tiago de Paula Peixoto <tiago@forked.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include <algorithm>
#include <tr1/unordered_set>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/lambda/bind.hpp>
#include <boost/random.hpp>
#include "graph.hh"
#include "histogram.hh"
#include "graph_filtering.hh"
#include "graph_selectors.hh"
#include "graph_properties.hh"
using namespace std;
using namespace boost;
using namespace boost::lambda;
using namespace graph_tool;
typedef boost::mt19937 rng_t;
//==============================================================================
// GetSampledDistanceHistogram()
// retrieves the histogram of sampled vertex-vertex distances
//==============================================================================
struct no_weightS {};
struct get_sampled_distances
{
template <class Graph, class IndexMap, class WeightMap, class Hist>
void operator()(const Graph &g, IndexMap index_map, WeightMap weights, Hist& hist, size_t samples, size_t seed) const
{
typedef HashedDescriptorMap<IndexMap,double> dist_map_t;
dist_map_t dist_map(index_map);
// select get_sum_vertex_dists based on the existence of weights
typedef typename mpl::if_<is_same<WeightMap, no_weightS>,
get_dists_bfs,
get_dists_djk>::type get_vertex_dists_t;
get_vertex_dists_t get_vertex_dists;
tr1::unordered_map<size_t, typename graph_traits<Graph>::vertex_descriptor> descriptors;
typename graph_traits<Graph>::vertex_iterator v, v_end;
size_t i = 0;
for(tie(v, v_end) = vertices(g); v != v_end; ++v,++i)
descriptors[i] = *v;
rng_t rng(seed);
uniform_int<size_t> sampler(0,descriptors.size()-1);
for(i=0; i < samples; ++i)
{
for(tie(v, v_end) = vertices(g); v != v_end; ++v)
dist_map[*v] = numeric_limits<double>::max();
typename graph_traits<Graph>::vertex_descriptor s = descriptors[sampler(rng)];
dist_map[s] = 0.0;
get_vertex_dists(g, s, index_map, dist_map, weights);
for(tie(v, v_end) = vertices(g); v != v_end; ++v,++i)
if (*v != s && dist_map[*v] != numeric_limits<double>::max() )
hist[dist_map[*v]]++;
}
}
// weighted version. Use dijkstra_shortest_paths()
struct get_dists_djk
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap, class WeightMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map, DistanceMap dist_map, WeightMap weights) const
{
dijkstra_shortest_paths(g, s, vertex_index_map(index_map).weight_map(weights).distance_map(dist_map));
}
};
// unweighted version. Use BFS.
struct get_dists_bfs
{
template <class Graph, class Vertex, class IndexMap, class DistanceMap>
void operator()(const Graph& g, Vertex s, IndexMap index_map, DistanceMap dist_map, no_weightS) const
{
breadth_first_search(g, s, visitor(make_bfs_visitor(record_distances(dist_map, on_tree_edge()))));
}
};
};
GraphInterface::hist_t
GraphInterface::GetSampledDistanceHistogram(string weight, size_t samples, size_t seed) const
{
hist_t hist;
if (weight == "")
{
check_filter(*this, bind<void>(get_sampled_distances(), _1, _vertex_index, no_weightS(), var(hist), samples, seed),
reverse_check(), directed_check());
}
else
{
try
{
dynamic_property_map& weight_prop = find_property_map(_properties, weight, typeid(graph_traits<multigraph_t>::edge_descriptor));
try
{
vector_property_map<double, edge_index_map_t> weight_map;
weight_map = get_static_property_map<vector_property_map<double, edge_index_map_t> >(weight_prop);
check_filter(*this, bind<void>(get_sampled_distances(), _1, _vertex_index, weight_map, var(hist), samples, seed),
reverse_check(), directed_check());
}
catch (bad_cast)
{
DynamicPropertyMapWrap<double, graph_traits<multigraph_t>::edge_descriptor> weight_map(weight_prop);
check_filter(*this, bind<void>(get_sampled_distances(), _1, _vertex_index, weight_map, var(hist), samples, seed),
reverse_check(), directed_check());
}
}
catch (property_not_found& e)
{
throw GraphException("error getting scalar property: " + string(e.what()));
}
}
return hist;
}
......@@ -77,55 +77,6 @@ struct bfs_max_depth_watcher
};
// this wraps a container as a property map which is automatically initialized
// with a given default value
template <class Container>
class InitializedPropertyMap
{
public:
typedef typename Container::value_type::second_type value_type;
typedef value_type& reference;
typedef typename Container::key_type key_type;
typedef boost::read_write_property_map_tag category;
InitializedPropertyMap(Container& base_map, value_type def)
: _base_map(&base_map), _default(def) {}
InitializedPropertyMap(){}
reference operator[](const key_type& k)
{
typename Container::iterator val;
val = _base_map->find(k);
if (val == _base_map->end())
val = _base_map->insert(make_pair(k, _default)).first;
return val->second;
}
private:
Container* _base_map;
value_type _default;
};
namespace boost
{
template <class Container>
void put(InitializedPropertyMap<Container>& m, const typename InitializedPropertyMap<Container>::key_type& key,
const typename InitializedPropertyMap<Container>::value_type& value)
{
m[key] = value;
}
template <class Container>
typename InitializedPropertyMap<Container>::value_type
get(InitializedPropertyMap<Container>& m, const typename InitializedPropertyMap<Container>::key_type& key)
{
return m[key];
}
}
struct get_extended_clustering
{
template <class Graph, class IndexMap, class ClusteringMap>
......@@ -169,6 +120,7 @@ struct get_extended_clustering
try
{
distance_map[*a] = 0;
bfs_max_depth_watcher<neighbour_set_t,InitializedPropertyMap<dmap_t> > watcher(targets, cmaps.size(), distance_map);
breadth_first_visit(fg, *a, visitor(make_bfs_visitor(make_pair(record_distances(distance_map, boost::on_tree_edge()),watcher))).
color_map(color_map));
......@@ -179,13 +131,13 @@ struct get_extended_clustering
typename graph_traits<Graph>::adjacency_iterator a2;
for(a2 = adjacent_vertices(*v, g).first ; a2 != a_end ; ++a2)
{