Commit cfc67930 authored by Tiago Peixoto's avatar Tiago Peixoto

Simplify and re-implement absolute_trust()

Now use _best weights_ to in-neighbours to determine trust, to avoid path
multiplicity catastrophe.
parent 8f787a13
......@@ -28,21 +28,20 @@ using namespace std;
using namespace boost;
using namespace graph_tool;
void absolute_trust(GraphInterface& g, int64_t source, boost::any c,
boost::any t, size_t n_paths, size_t n_paths_vertex,
double epsilon, bool reversed)
void absolute_trust(GraphInterface& g, int64_t source, int64_t target,
boost::any c, boost::any t)
{
if (!belongs<edge_floating_properties>()(c))
throw ValueException("edge property must be of floating point value type");
if (!belongs<vertex_floating_vector_properties>()(t))
throw ValueException("vertex property must be of floating point vector value type");
if (!belongs<vertex_floating_properties>()(t))
throw ValueException("vertex property must be of floating point value type");
run_action<>()(g,
bind<void>(get_absolute_trust(), _1, g.GetVertexIndex(),
g.GetEdgeIndex(), g.GetMaxEdgeIndex(), source, _2,
_3, make_tuple(n_paths, n_paths_vertex, epsilon),
reversed), edge_floating_properties(),
vertex_floating_vector_properties())(c, t);
g.GetEdgeIndex(), g.GetMaxEdgeIndex(), source,
target, _2, _3),
edge_floating_properties(),
vertex_floating_properties())(c,t);
}
void export_absolute_trust()
......
......@@ -26,7 +26,7 @@
#include <tr1/tuple>
#include <algorithm>
#include "minmax.hh"
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <iostream>
......@@ -34,330 +34,137 @@ namespace graph_tool
{
using namespace std;
using namespace boost;
using std::tr1::get;
using std::tr1::tuple;
template <class Path>
struct path_cmp
struct stop_search {}; // exception to be thrown to stop search
template <class SourceMap, class WeightMap>
class source_counter:
public boost::dijkstra_visitor<null_visitor>
{
path_cmp(vector<Path>& paths): _paths(paths) {}
vector<Path>& _paths;
public:
source_counter(SourceMap source_map, WeightMap weight_map, size_t n_sources)
: _source_map(source_map), _weight_map(weight_map),
_n_sources(n_sources) {}
template <class Graph>
void examine_vertex(typename graph_traits<Graph>::vertex_descriptor u,
Graph& g)
{
// stop if all sources are found
if (_source_map[u])
{
_n_sources--;
if (_n_sources == 0)
throw stop_search();
}
}
private:
SourceMap _source_map;
WeightMap _weight_map;
size_t _n_sources;
};
struct dist_compare
{
template <class Type1, class Type2>
bool operator()(const Type1& d1, const Type2& d2) const
{
return d1 > d2; // we want trust paths with _maximum_ "distance"
}
};
typedef size_t first_argument_type;
typedef size_t second_argument_type;
typedef bool result_type;
inline bool operator()(size_t a, size_t b)
struct dist_combine
{
template <class DistType, class WeightType>
DistType operator()(const DistType& d, const WeightType& w) const
{
if (get<0>(_paths[a]).second == get<0>(_paths[b]).second)
return get<1>(_paths[a]).size() > get<1>(_paths[b]).size();
return get<0>(_paths[a]).second < get<0>(_paths[b]).second;
return d*w;
}
};
// predicate to filter a single vertex from the graph
struct filter_vertex_pred
{
filter_vertex_pred() {}
filter_vertex_pred(size_t v): _v(v) {}
template <class Vertex>
bool operator()(const Vertex& v) const { return v != _v; }
size_t _v;
};
struct get_absolute_trust
{
template <class Graph, class VertexIndex, class EdgeIndex, class TrustMap,
class InferredTrustMap>
void operator()(Graph& g, VertexIndex vertex_index, EdgeIndex edge_index,
size_t max_edge_index, int64_t source, TrustMap c,
InferredTrustMap t, boost::tuple<size_t, size_t,
double> path_limits, bool reversed) const
size_t max_edge_index, int64_t source, int64_t target,
TrustMap c, InferredTrustMap t) const
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename graph_traits<Graph>::edge_descriptor edge_t;
typedef typename property_traits<TrustMap>::value_type c_type;
typedef typename property_traits<InferredTrustMap>::value_type::
value_type t_type;
typedef typename property_traits<InferredTrustMap>::value_type t_type;
size_t n_paths = get<0>(path_limits);
size_t n_paths_vertex = get<1>(path_limits);
t_type epsilon = get<2>(path_limits);
vertex_t v = vertex(source, g);
// the path type: the first value is the (trust,weight) pair, the second
// the set of vertices in the path and the third is the list of edges,
// in the path sequence.
typedef tuple<pair<t_type, t_type>, tr1::unordered_set<vertex_t>,
vector<edge_t> > path_t;
int i, N = (source == -1) ? num_vertices(g) : source + 1;
int i, N = (target == -1) ? num_vertices(g) : target + 1;
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
for (i= (source == -1) ? 0 : source; i < N; ++i)
for (i = (target == -1) ? 0 : target; i < N; ++i)
{
vertex_t v = vertex(i, g);
t[v].resize(num_vertices(g));
// path priority queue
vector<path_t> paths(1);
typedef double_priority_queue<size_t, path_cmp<path_t> > queue_t;
queue_t queue = queue_t(path_cmp<path_t>(paths));
get<0>(paths.back()).first = get<0>(paths.back()).second = 1;
get<1>(paths.back()).insert(v);
queue.push(0);
vector<size_t> free_paths; // free path indexes
// this is the actual queue of paths which will be used to compute
// the trust values
queue_t final_queue = queue_t(path_cmp<path_t>(paths));
// store all paths which reach a given vertex
unchecked_vector_property_map<tr1::unordered_set<size_t>,
VertexIndex>
path_map(vertex_index, num_vertices(g));
unchecked_vector_property_map<bool, VertexIndex>
saturated(vertex_index, num_vertices(g));
while (!queue.empty())
{
size_t pi = queue.top();
queue.pop_top();
vertex_t w;
vertex_t ptarget;
if (get<2>(paths[pi]).size() > 0)
ptarget = target(get<2>(paths[pi]).back(),g);
// push queue top into final queue
if (get<2>(paths[pi]).size() > 0 &&
(n_paths_vertex == 0 || !saturated[ptarget]))
{
w = target(get<2>(paths[pi]).back(), g);
final_queue.push(pi);
// augment path map
path_map[ptarget].insert(pi);
if (path_map[ptarget].size() == n_paths_vertex)
saturated[ptarget] = true;
}
else
{
w = v; // the first path
}
// if maximum size is reached, remove the bottom
if ((n_paths > 0) && (final_queue.size() > n_paths))
{
size_t bi = final_queue.bottom();
ptarget = target(get<2>(paths[bi]).back(),g);
if (!saturated[ptarget])
{
final_queue.pop_bottom();
free_paths.push_back(bi);
// remove path from path map
path_map[ptarget].erase(bi);
if (bi == pi)
continue;
}
}
// augment paths and put them in the queue
typename graph_traits<Graph>::out_edge_iterator e, e_end;
for (tie(e, e_end) = out_edges(w, g); e != e_end; ++e)
{
vertex_t a = target(*e, g);
// no loops
if (get<1>(paths[pi]).find(a) == get<1>(paths[pi]).end())
{
// only follow non-zero paths
if (c[*e] > 0 || (reversed &&
get<1>(paths[pi]).size() == 1))
{
size_t npi;
// clone last path
if (free_paths.empty())
{
paths.push_back(paths[pi]);
npi = paths.size()-1;
}
else
{
npi = free_paths.back();
free_paths.pop_back();
paths[npi] = paths[pi];
}
path_t& np = paths[npi]; // new path
if (!reversed)
{
// path weight
get<0>(np).second = get<0>(np).first;
vertex_t w = vertex(i, g);
// path value
get<0>(np).first *= c[*e];
}
else
{
if (get<1>(np).size() > 1)
get<0>(np).second *= c[*e];
get<0>(np).first *= c[*e];
}
get<1>(np).insert(a);
get<2>(np).push_back(*e);
// mark the sources
typedef unchecked_vector_property_map<uint8_t, VertexIndex>
source_map_t;
source_map_t source_map(vertex_index, num_vertices(g));
// keep following paths only if there is a chance
// they will make it into the final queue
if ((n_paths > 0 && final_queue.size() < n_paths) ||
(final_queue.size() == 0 ||
(get<0>(np).second >=
get<0>(paths[final_queue.bottom()]).second)))
{
// drop paths with weight smaller than epsilon
if (get<0>(np).second > epsilon)
queue.push(npi);
}
else
{
if (npi == paths.size() - 1)
paths.pop_back();
else
free_paths.push_back(npi);
}
}
}
}
}
typename in_edge_iteratorS<Graph>::type e, e_end;
for (tie(e, e_end) = in_edge_iteratorS<Graph>::get_edges(w, g);
e != e_end; ++e)
source_map[boost::source(*e,g)] = true;
unchecked_vector_property_map<t_type, VertexIndex>
weight_sum(vertex_index, num_vertices(g));
// filter vertex w out of the graph
filtered_graph<Graph, boost::keep_all, filter_vertex_pred>
fg(g, boost::keep_all(), filter_vertex_pred(w));
// paths which were already calculated and can be skipped
tr1::unordered_set<size_t> skip;
// distance map (target weight map)
typedef unchecked_vector_property_map<t_type, VertexIndex>
dist_map_t;
dist_map_t dist_map(vertex_index, num_vertices(g));
// calculate trust from paths in the final queue
while (!final_queue.empty())
// compute the targets weights
try
{
size_t pi = final_queue.top();
final_queue.pop_top();
path_t& p = paths[pi];
if (skip.find(pi) != skip.end())
continue;
tr1::unordered_set<size_t>& apaths =
path_map[target(get<2>(p).back(), g)]; // all paths with the
// same final target
tr1::unordered_set<size_t> vlist; // all vertices involved
// compute cumulative edge weight
tr1::unordered_map<size_t,
tr1::unordered_map<vertex_t, t_type> >
cum_weight;
for (typeof(apaths.begin()) iter = apaths.begin();
iter != apaths.end(); ++iter)
{
path_t& path = paths[*iter];
size_t path_size = get<2>(path).size();
vertex_t ptarget = target(get<2>(path).back(),g);
t_type w = 1;
for (size_t j = 0; j < path_size; ++j)
{
edge_t e;
if (!reversed)
{
e = get<2>(path)[path_size - 1 - j];
if (j < path_size - 1)
w *= c[e];
}
else
{
e = get<2>(path)[j];
if (j > 0)
w *= c[e];
}
cum_weight[edge_index[e]][ptarget] += w;
vlist.insert(target(e,g));
}
vlist.insert(boost::source(get<2>(paths[*iter]).front(),g));
}
// compute out-weight
tr1::unordered_map<vertex_t,
tr1::unordered_map<vertex_t, t_type> >
out_weight;
for (typeof(vlist.begin()) viter = vlist.begin();
viter != vlist.end(); ++viter)
{
vertex_t u = *viter;
if (!reversed)
{
typename graph_traits<Graph>::out_edge_iterator e,e_end;
for (tie(e, e_end) = out_edges(u, g); e != e_end; ++e)
{
size_t ei = edge_index[*e];
for (typeof(cum_weight[ei].begin()) witer =
cum_weight[ei].begin();
witer != cum_weight[ei].end(); ++witer)
out_weight[u][witer->first] += witer->second;
}
}
else
{
// if reversed, use "in-trust" instead
typename in_edge_iteratorS<Graph>::type e, e_end;
for (tie(e, e_end) =
in_edge_iteratorS<Graph>::get_edges(v, g);
e != e_end; ++e)
{
size_t ei = edge_index[*e];
for (typeof(cum_weight[ei].begin()) witer =
cum_weight[ei].begin();
witer != cum_weight[ei].end(); ++witer)
out_weight[u][witer->first] += witer->second;
}
}
}
for (typeof(apaths.begin()) iter = apaths.begin();
iter != apaths.end(); ++iter)
{
size_t pi = *iter;
path_t& p = paths[pi];
vertex_t ptarget = target(get<2>(paths[*iter]).back(),g);
// calculate the trust value and weight of the path
t_type w = 1, val = 1;
for (size_t i = 0; i < get<2>(p).size(); ++i)
{
edge_t e = get<2>(p)[i];
vertex_t u = (!reversed) ?
boost::source(e,g) : target(e,g);
if (out_weight[u][ptarget] > 0)
{
if ((!reversed && i < get<2>(p).size()-1) ||
(reversed && i > 0))
w *= c[e]*cum_weight[edge_index[e]][ptarget]/
out_weight[u][ptarget];
}
val *= c[e];
}
vertex_t u = target(get<2>(p).back(), g);
weight_sum[u] += w;
t[v][u] += w*val;
skip.insert(pi);
}
size_t k = in_degreeS()(w,g);
source_counter<source_map_t,dist_map_t>
visitor(source_map, dist_map, k);
dijkstra_shortest_paths(fg, v, weight_map(c).
vertex_index_map(vertex_index).
distance_map(dist_map).
distance_compare(dist_compare()).
distance_combine(dist_combine()).
distance_inf(t_type(0)).
distance_zero(t_type(1)).
visitor(visitor));
}
catch (const stop_search&) {}
int j, N = num_vertices(g);
#pragma omp parallel for default(shared) private(j) \
schedule(dynamic)
for (j = 0; j < N; ++j)
// compute the target's trust
t_type sum_w = 0, avg = 0;
for (tie(e, e_end) = in_edge_iteratorS<Graph>::get_edges(w, g);
e != e_end; ++e)
{
vertex_t w = vertex(j, g);
if (w == graph_traits<Graph>::null_vertex())
continue;
if (weight_sum[w] > 0)
t[v][w] /= weight_sum[w];
t_type weight = dist_map[boost::source(*e,g)];
sum_w += weight;
avg += c[*e]*weight*weight;
}
if (sum_w > 0)
t[w] = avg/sum_w;
}
}
};
}
......
......@@ -406,8 +406,7 @@ def eigentrust(g, trust_map, vprop=None, norm=False, epslon=1e-6, max_iter=0,
else:
return vprop
def absolute_trust(g, trust_map, source = None, vprop=None, n_paths=10000,
n_paths_vertex=10, epsilon = 0, reversed=False):
def absolute_trust(g, trust_map, source, target = None, vprop=None):
r"""
Calculate the absolute trust centrality of each vertex in the graph, from a
given source.
......@@ -419,29 +418,23 @@ def absolute_trust(g, trust_map, source = None, vprop=None, n_paths=10000,
trust_map : :class:`~graph_tool.PropertyMap`
Edge property map with the values of trust associated with each
edge. The values must lie in the range [0,1].
source : Vertex (optional, default: None)
A vertex which is used the as the source for gathering trust values. If
left unspecified, the trust values for all sources are computed.
source : Vertex
A vertex which is used the as the source for gathering trust values.
target : Vertex (optional, default: None)
A vertex which is used the as the only target for which the trust value
will be calculated. If left unspecified, the trust values for all
targets are computed.
vprop : :class:`~graph_tool.PropertyMap`, optional (default: None)
Vector vertex property map where the values of trust for each source
A vertex property map where the values of trust for each source
must be stored.
n_paths : int, optimal (default: 10000)
Maximum number of paths to consider.
reversed : bool, optional (default: False)
Calculates the "reversed" trust instead: The direction of the edges are
inverted, but the path weighting is preserved in the original direction
(see Notes below).
Returns
-------
absolute_trust : :class:`~graph_tool.PropertyMap`
absolute_trust : :class:`~graph_tool.PropertyMap` or float
A vertex property map containing the absolute trust vector from the
corresponding vertex to the rest of the network. Each element i of the
vector is the trust value of the vertex with index i, from the given
vertex.
If the parameter "source" is specified, the values of the
property map are scalars, instead of vectors.
source vertex to the rest of the network. If `target` is specified, the
result is a single float, with the corresponding trust value for the
target.
See Also
--------
......@@ -455,28 +448,23 @@ def absolute_trust(g, trust_map, source = None, vprop=None, n_paths=10000,
.. math::
t_{ij} = \frac{1}{\sum_{\{i\to j\}}w_{\{i\to j\}}}\sum_{\{i\to j\}}
w_{\{i\to j\}} \prod_{e\in \{i\to j\}}c_e
t_{ij} = \frac{\sum_m A_{m,j} w^2_{G\setminus\{j\}}(i\to m)c_{m,j}}
{\sum_m A_{m,j} w_{G\setminus\{j\}}(i\to m)}
where the sum is taken over all paths from i to j (without loops),
:math:`c_e` is the direct trust value associated with edge e, and
:math:`w_{\{i\to j\}}` is the weight of a given path, which is defined as
where :math:`A_{ij}` is the adjacency matrix, :math:`c_{ij}` is the direct
trust from i to j, and :math:`w_G(i\to j)` is the weight of the path with
maximum weight from i to j, computed as
.. math::
w_{\{i\to j\}} = \prod_{e\in \{i\to j\}}\frac{c_e}{\Gamma^+_{\{i\to j\}}(s(e))}
\{c_e(1-\delta_{t(e),j}) + \delta_{t(e),j}},
such that the direct trust of the last edge on the path is not
considered. The value :math:`\Gamma^+_{\{i\to j\}}(s(e))` is the sum of
trust values of the selected out-edges of vertex :math:`s(e)`, which also
belong to the set of edge-disjoint of paths from i to j.
w_G(i\to j) = \prod_{e\in i\to j} c_e.
The algorithm measures the absolute trust by following all vertex-disjoint
paths, and keeping them on a priority queue. Each iteration the path with
maximum weight is augmented, and the new paths pushed into the queue. The
algorithm stops when all paths are consumed, or when the all the ``n_paths``
paths with largest weights are found.
The algorithm measures the absolute trust by finding the paths with maximum
weight, using Dijkstra's algorithm, to all in-neighbours of a given
target. This search needs to be performed repeatedly for every target, since
it needs to be removed from the graph first. The resulting complexity is
therefore :math:`O(N^2\log N)` for all targets, and :math:`O(N\log N)` for a
single target.
If enabled during compilation, this algorithm runs in parallel.
......@@ -486,70 +474,42 @@ def absolute_trust(g, trust_map, source = None, vprop=None, n_paths=10000,
>>> seed(42)
>>> g = gt.random_graph(100, lambda: (poisson(3), poisson(3)))
>>> trust = g.new_edge_property("double")
>>> trust.get_array()[:] = random(g.num_edges())
>>> trust.a = random(g.num_edges())
>>> t = gt.absolute_trust(g, trust, source=g.vertex(0))
>>> print t.a
[ 0.00000000e+00 5.14258135e-02 2.42874582e-04 1.05347472e-06
0.00000000e+00 3.13429149e-04 1.53697222e-04 3.83063399e-05
2.65668937e-06 2.04029901e-05 1.19582153e-05 2.67743821e-06
1.50606560e-04 1.51595650e-05 5.72684475e-05 2.16466381e-06
0.00000000e+00 4.08340061e-05 3.26896572e-06 7.80860267e-05
7.31033290e-05 7.81690832e-05 2.93440658e-04 1.19013202e-05
1.60601849e-06 6.79167712e-05 9.35414301e-05 1.98991248e-05
2.08142130e-05 1.28565785e-04 2.83893891e-03 8.45362053e-05
1.15751883e-05 1.97248846e-05 0.00000000e+00 7.51004486e-06
5.49704676e-07 0.00000000e+00 1.06219388e-04 9.64852468e-04
0.00000000e+00 4.70496027e-05 5.49108602e-05 6.23617670e-06
1.32625806e-06 7.35202433e-05 2.09546902e-06 1.99138155e-03
4.32934771e-06 2.61887887e-05 2.55099939e-05 3.90874553e-06
9.07765143e-05 2.59243068e-06 7.50032403e-06 8.36211398e-05
7.80814352e-04 8.12133072e-06 6.24066931e-04 2.19465770e-06
4.15039190e-05 5.41464668e-05 1.84421073e-03 8.02449156e-06
4.01472852e-06 3.76746767e-01 7.02886863e-05 1.52365123e-04
4.58687938e-06 3.70470973e-02 0.00000000e+00 1.85922960e-06
2.05481272e-05 1.41021895e-04 1.45217040e-06 3.18562543e-06
2.62264044e-01 7.41140347e-06 1.39150089e-05 3.86583428e-06
2.85681164e-06 4.12923146e-06 7.05705402e-07 2.12584322e-05
1.65948868e-04 3.10144404e-05 5.08749580e-06 0.00000000e+00
1.45435603e-03 4.19224443e-03 4.88198531e-05 3.00152848e-04
5.61591759e-05 2.31951396e-04 1.19051653e-05 2.34710286e-05
6.27636571e-04 1.65759606e-02 1.30944429e-05 1.26282526e-05]
[ 0.05927703 0.06133836 0. 0.05630559 0. 0.03317174
0.03488483 0.15920558 0.16940159 0.09716039 0.1485169 0.0120287
0.03787312 0.37284274 0.00646336 0.0084941 0.0379645 0.07997339
0.10733769 0.10053845 0.00283938 0.05224064 0. 0.16523684
0.0393326 0.25853808 0.14682555 0.03254906 0.12124144 0.0118341
0.18110839 0.18513216 0.05031324 0.04484457 0.17197674 0.08569659
0.17523371 0.22435776 0.33916191 0.07980329 0. 0.
0.09750183 0.09811054 0.14574289 0.0085499 0.34593499 0.03151408
0.083739 0.05409947 0.09161205 0.19921201 0.10647812 0.21597253
0.06266044 0.8738786 0.11239455 0.09493216 0.19073287 0.11968616
0.13409125 0.00626821 0.05857625 0.05917779 0.05673643 0.
0.02682173 0.00355514 0.17475858 0.15113517 0.13247358 0.
0.04003866 0.00997401 0.11126411 0.07400706 0.11247583 0.10125886
0.16028191 0.04300862 0.03259707 0.0225482 0.05538721 0.
0.06715919 0.0701153 0.02999368 0.04675702 0.06310919 0.01722603
0.18455906 0.08034113 0.00376382 0.10041304 0.3437539 0.10530238
0.11654855 0.09495419 0.05317485 0.10727767]
"""