Commit 9137c542 authored by Tiago Peixoto's avatar Tiago Peixoto

vertex_similarity(): Add support for weighted and multigraphs

This fixes issue #592.
parent 2c084876
Pipeline #492 failed with stage
in 395 minutes and 23 seconds
...@@ -25,107 +25,135 @@ using namespace std; ...@@ -25,107 +25,135 @@ using namespace std;
using namespace boost; using namespace boost;
using namespace graph_tool; using namespace graph_tool;
void get_dice_similarity(GraphInterface& gi, boost::any as, bool self_loop) typedef UnityPropertyMap<uint8_t, GraphInterface::edge_t> ecmap_t;
typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
weight_props_t;
void get_dice_similarity(GraphInterface& gi, boost::any as, boost::any weight)
{ {
if (weight.empty())
weight = ecmap_t();
gt_dispatch<>() gt_dispatch<>()
([&](auto& g, auto& s) ([&](auto& g, auto& s, auto& w)
{ {
all_pairs_similarity(g, s, all_pairs_similarity(g, s,
[&](auto u, auto v, auto& mask) [&](auto u, auto v, auto& mask, auto& w)
{ {
return dice(u, v, self_loop, mask, g); return dice(u, v, mask, w, g);
}); }, w);
}, },
all_graph_views(), vertex_floating_vector_properties()) all_graph_views(), vertex_floating_vector_properties(),
(gi.get_graph_view(), as); weight_props_t())
(gi.get_graph_view(), as, weight);
} }
void get_dice_similarity_pairs(GraphInterface& gi, python::object opairs, void get_dice_similarity_pairs(GraphInterface& gi, python::object opairs,
python::object osim, bool self_loop) python::object osim, boost::any weight)
{ {
multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs); multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs);
multi_array_ref<double,1> sim = get_array<double,1>(osim); multi_array_ref<double,1> sim = get_array<double,1>(osim);
if (weight.empty())
weight = ecmap_t();
gt_dispatch<>() gt_dispatch<>()
([&](auto& g) ([&](auto& g, auto w)
{ {
some_pairs_similarity(g, pairs, sim, some_pairs_similarity(g, pairs, sim,
[&](auto u, auto v, auto& mask) [&](auto u, auto v, auto& mask, auto& w)
{ {
return dice(u, v, self_loop, mask, g); return dice(u, v, mask, w, g);
}); }, w);
}, },
all_graph_views()) all_graph_views(), weight_props_t())
(gi.get_graph_view()); (gi.get_graph_view(), weight);
} }
void get_jaccard_similarity(GraphInterface& gi, boost::any as, bool self_loop) void get_jaccard_similarity(GraphInterface& gi, boost::any as, boost::any weight)
{ {
if (weight.empty())
weight = ecmap_t();
gt_dispatch<>() gt_dispatch<>()
([&](auto& g, auto& s) ([&](auto& g, auto& s, auto w)
{ {
all_pairs_similarity(g, s, all_pairs_similarity(g, s,
[&](auto u, auto v, auto& mask) [&](auto u, auto v, auto& mask, auto w)
{ {
return jaccard(u, v, self_loop, mask, g); return jaccard(u, v, mask, w, g);
}); }, w);
}, },
all_graph_views(), vertex_floating_vector_properties()) all_graph_views(), vertex_floating_vector_properties(),
(gi.get_graph_view(), as); weight_props_t())
(gi.get_graph_view(), as, weight);
} }
void get_jaccard_similarity_pairs(GraphInterface& gi, python::object opairs, void get_jaccard_similarity_pairs(GraphInterface& gi, python::object opairs,
python::object osim, bool self_loop) python::object osim, boost::any weight)
{ {
multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs); multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs);
multi_array_ref<double,1> sim = get_array<double,1>(osim); multi_array_ref<double,1> sim = get_array<double,1>(osim);
if (weight.empty())
weight = ecmap_t();
gt_dispatch<>() gt_dispatch<>()
([&](auto& g) ([&](auto& g, auto w)
{ {
some_pairs_similarity(g, pairs, sim, some_pairs_similarity(g, pairs, sim,
[&](auto u, auto v, auto& mask) [&](auto u, auto v, auto& mask, auto w)
{ {
return jaccard(u, v, self_loop, mask, g); return jaccard(u, v, mask, w, g);
}); }, w);
}, },
all_graph_views()) all_graph_views(), weight_props_t())
(gi.get_graph_view()); (gi.get_graph_view(), weight);
} }
void get_inv_log_weight_similarity(GraphInterface& gi, boost::any as) void get_inv_log_weight_similarity(GraphInterface& gi, boost::any as,
boost::any weight)
{ {
if (weight.empty())
weight = ecmap_t();
gt_dispatch<>() gt_dispatch<>()
([&](auto& g, auto& s) ([&](auto& g, auto& s, auto w)
{ {
all_pairs_similarity(g, s, all_pairs_similarity(g, s,
[&](auto u, auto v, auto& mask) [&](auto u, auto v, auto& mask, auto w)
{ {
return inv_log_weighted(u, v, mask, g); return inv_log_weighted(u, v, mask, w, g);
}); }, w);
}, },
all_graph_views(), vertex_floating_vector_properties()) all_graph_views(), vertex_floating_vector_properties(),
(gi.get_graph_view(), as); weight_props_t())
(gi.get_graph_view(), as, weight);
} }
void get_inv_log_weight_similarity_pairs(GraphInterface& gi, void get_inv_log_weight_similarity_pairs(GraphInterface& gi,
python::object opairs, python::object opairs,
python::object osim) python::object osim,
boost::any weight)
{ {
multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs); multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs);
multi_array_ref<double,1> sim = get_array<double,1>(osim); multi_array_ref<double,1> sim = get_array<double,1>(osim);
if (weight.empty())
weight = ecmap_t();
gt_dispatch<>() gt_dispatch<>()
([&](auto& g) ([&](auto& g, auto w)
{ {
some_pairs_similarity(g, pairs, sim, some_pairs_similarity(g, pairs, sim,
[&](auto u, auto v, auto& mask) [&](auto u, auto v, auto& mask, auto w)
{ {
return inv_log_weighted(u, v, mask, g); return inv_log_weighted(u, v, mask, w, g);
}); }, w);
}, },
all_graph_views()) all_graph_views(), weight_props_t())
(gi.get_graph_view()); (gi.get_graph_view(), weight);
} }
......
...@@ -25,80 +25,81 @@ namespace graph_tool ...@@ -25,80 +25,81 @@ namespace graph_tool
using namespace std; using namespace std;
using namespace boost; using namespace boost;
template <class Graph, class Vertex, class Mark> template <class Graph, class Vertex, class Mark, class Weight>
double dice(Vertex u, Vertex v, bool self_loop, Mark& mark, Graph& g) double dice(Vertex u, Vertex v, Mark& mark, Weight& weight, Graph& g)
{ {
size_t count = 0; typename property_traits<Weight>::value_type count = 0, ku = 0, kv = 0;
for (auto w : adjacent_vertices_range(u, g)) for (auto e : out_edges_range(u, g))
mark[w] = true;
if (self_loop)
mark[u] = true;
for (auto w : adjacent_vertices_range(v, g))
{ {
if (mark[w]) auto w = weight[e];
count++; mark[target(e, g)] += w;
ku += w;
}
for (auto e : out_edges_range(v, g))
{
auto w = weight[e];
auto dw = std::min(w, mark[target(e, g)]);
mark[target(e, g)] -= dw;
count += dw;
kv += w;
} }
for (auto w : adjacent_vertices_range(u, g)) for (auto w : adjacent_vertices_range(u, g))
mark[w] = false; mark[w] = 0;
if (self_loop) return 2 * count / double(ku + kv);
mark[u] = false;
return 2 * count / double(out_degree(u, g) + out_degree(v, g));
} }
template <class Graph, class Vertex, class Mark> template <class Graph, class Vertex, class Mark, class Weight>
double jaccard(Vertex u, Vertex v, bool self_loop, Mark& mark, Graph& g) double jaccard(Vertex u, Vertex v, Mark& mark, Weight& weight, Graph& g)
{ {
size_t count = 0, total = 0; typename property_traits<Weight>::value_type count = 0, total = 0;
for (auto w : adjacent_vertices_range(u, g)) for (auto e : out_edges_range(u, g))
{ {
mark[w] = true; auto w = weight[e];
total++; mark[target(e, g)] += w;
total += w;
} }
if (self_loop) for (auto e : out_edges_range(v, g))
mark[u] = true;
for (auto w : adjacent_vertices_range(v, g))
{ {
if (mark[w]) auto w = weight[e];
count++; auto dw = std::min(w, mark[target(e, g)]);
else count += dw;
total++; mark[target(e, g)] -= dw;
total += w - dw;
} }
for (auto w : adjacent_vertices_range(u, g)) for (auto w : adjacent_vertices_range(u, g))
mark[w] = false; mark[w] = 0;
if (self_loop)
mark[u] = false;
return count / double(total); return count / double(total);
} }
template <class Graph, class Vertex, class Mark> template <class Graph, class Vertex, class Mark, class Weight>
double inv_log_weighted(Vertex u, Vertex v, Mark& mark, Graph& g) double inv_log_weighted(Vertex u, Vertex v, Mark& mark, Weight& weight, Graph& g)
{ {
double count = 0; double count = 0;
for (auto w : adjacent_vertices_range(u, g)) for (auto e : out_edges_range(u, g))
mark[w] = true; mark[target(e, g)] += weight[e];
for (auto w : adjacent_vertices_range(v, g)) for (auto w : adjacent_vertices_range(v, g))
{ {
if (mark[w]) if (mark[w] > 0)
{ {
if (graph_tool::is_directed(g)) if (graph_tool::is_directed(g))
count += 1. / log(in_degreeS()(w, g)); count += mark[w] / log(in_degreeS()(w, g, weight));
else else
count += 1. / log(out_degree(w, g)); count += mark[w] / log(out_degreeS()(w, g, weight));
} }
} }
for (auto w : adjacent_vertices_range(u, g)) for (auto w : adjacent_vertices_range(u, g))
mark[w] = false; mark[w] = 0;
return count; return count;
} }
template <class Graph, class VMap, class Sim> template <class Graph, class VMap, class Sim, class Weight>
void all_pairs_similarity(Graph& g, VMap s, Sim&& f) void all_pairs_similarity(Graph& g, VMap s, Sim&& f, Weight& weight)
{ {
vector<bool> mask(num_vertices(g), false); vector<typename property_traits<Weight>::value_type>
mask(num_vertices(g));
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH) \ #pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH) \
firstprivate(mask) firstprivate(mask)
parallel_vertex_loop_no_spawn parallel_vertex_loop_no_spawn
...@@ -107,23 +108,25 @@ void all_pairs_similarity(Graph& g, VMap s, Sim&& f) ...@@ -107,23 +108,25 @@ void all_pairs_similarity(Graph& g, VMap s, Sim&& f)
{ {
s[v].resize(num_vertices(g)); s[v].resize(num_vertices(g));
for (auto w : vertices_range(g)) for (auto w : vertices_range(g))
s[v][w] = f(v, w, mask); s[v][w] = f(v, w, mask, weight);
}); });
} }
template <class Graph, class Vlist, class Slist, class Sim> template <class Graph, class Vlist, class Slist, class Sim, class Weight>
void some_pairs_similarity(Graph& g, Vlist& vlist, Slist& slist, Sim&& f) void some_pairs_similarity(Graph& g, Vlist& vlist, Slist& slist, Sim&& f,
Weight& weight)
{ {
vector<bool> mask(num_vertices(g), false); vector<typename property_traits<Weight>::value_type>
mark(num_vertices(g));
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH) \ #pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH) \
firstprivate(mask) firstprivate(mark)
parallel_loop_no_spawn parallel_loop_no_spawn
(vlist, (vlist,
[&](size_t i, const auto& val) [&](size_t i, const auto& val)
{ {
size_t u = val[0]; size_t u = val[0];
size_t v = val[1]; size_t v = val[1];
slist[i] = f(u, v, mask); slist[i] = f(u, v, mark, weight);
}); });
} }
......
...@@ -264,7 +264,7 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -264,7 +264,7 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
return s return s
@_limit_args({"sim_type": ["dice", "jaccard", "inv-log-weight"]}) @_limit_args({"sim_type": ["dice", "jaccard", "inv-log-weight"]})
def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True, def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, eweight=None,
sim_map=None): sim_map=None):
r"""Return the similarity between pairs of vertices. r"""Return the similarity between pairs of vertices.
...@@ -278,9 +278,8 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True, ...@@ -278,9 +278,8 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True,
vertex_pairs : iterable of pairs of integers (optional, default: ``None``) vertex_pairs : iterable of pairs of integers (optional, default: ``None``)
Pairs of vertices to compute the similarity. If omitted, all pairs will Pairs of vertices to compute the similarity. If omitted, all pairs will
be considered. be considered.
self_loops : bool (optional, default: ``True``) eweight : :class:`~graph_tool.EdgePropertyMap` (optional, default: ``None``)
If ``True``, vertices will be considered adjacent to themselves for the Edge weights.
purpose of the similarity computation.
sim_map : :class:`~graph_tool.VertexPropertyMap` (optional, default: ``None``) sim_map : :class:`~graph_tool.VertexPropertyMap` (optional, default: ``None``)
If provided, and ``vertex_pairs is None``, the vertex similarities will If provided, and ``vertex_pairs is None``, the vertex similarities will
be stored in this vector-valued property. Otherwise, a new one will be be stored in this vector-valued property. Otherwise, a new one will be
...@@ -296,27 +295,41 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True, ...@@ -296,27 +295,41 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True,
Notes Notes
----- -----
According to ``sim_type``, this function computes the following similarities: According to ``sim_type``, this function computes one of the following
similarities:
``sim_type == "dice"`` ``sim_type == "dice"``
The Sørensen–Dice similarity [sorensen-dice]_ is twice the number of The Sørensen–Dice similarity [sorensen-dice]_ of vertices :math:`u` and
common neighbors between two vertices divided by the sum of their :math:`v` is defined as
degrees.
.. math::
\frac{2|\Gamma(u)\cap\Gamma(v)|}{|\Gamma(u)|+|\Gamma(v)},
where :math:`\Gamma(u)` is the set of neighbors of vertex :math:`u`.
``sim_type == "jaccard"`` ``sim_type == "jaccard"``
The Jaccard similarity [jaccard]_ is the number of common neighbors The Jaccard similarity [jaccard]_ of vertices :math:`u` and
between two vertices divided by the size of the set of all neighbors to :math:`v` is defined as
both vertices.
.. math::
\frac{|\Gamma(u)\cap\Gamma(v)|}{|\Gamma(u)\cup\Gamma(v)},
where :math:`\Gamma(u)` is the set of neighbors of vertex :math:`u`.
``sim_type == "inv-log-weight"`` ``sim_type == "inv-log-weight"``
The inverse log weighted similarity [adamic-friends-2003]_ is the sum of The inverse log weighted similarity [adamic-friends-2003]_ of vertices
the weights of common neighbors between two vertices, where the weights :math:`u` and :math:`v` is defined as
are computed as :math:`1/\log(k)`, with :math:`k` being the degree of the
vertex. .. math::
\sum_{w \in \Gamma(u)\cap\Gamma(v)}\frac{1}{\log |\Gamma(w)|},
where :math:`\Gamma(u)` is the set of neighbors of vertex :math:`u`.
For directed graphs, only out-neighbors are considered in the above For directed graphs, only out-neighbors are considered in the above
algorthms (for "inv-log-weight", the in-degrees are used to compute the algorthms (for "inv-log-weight", the in-degrees are used to compute the
...@@ -324,6 +337,17 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True, ...@@ -324,6 +337,17 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True,
should be used to reverse the graph, e.g. ``vertex_similarity(GraphView(g, should be used to reverse the graph, e.g. ``vertex_similarity(GraphView(g,
reversed=True))``. reversed=True))``.
For weighted or multigraphs, in the above equations it is assumed the
following:
.. math::
|\Gamma(u)\cap\Gamma(v)| &= \sum_w \operatorname{min}(A_{wv}, A_{wu}),\\
|\Gamma(u)\cup\Gamma(v)| &= \sum_w \operatorname{max}(A_{wv}, A_{wu}),\\
|\Gamma(u)| &= \sum_w A_{wu},
where :math:`A_{wu}` is the weighted adjacency matrix.
The algorithm runs with complexity :math:`O(\left<k\right>N^2)` if The algorithm runs with complexity :math:`O(\left<k\right>N^2)` if
``vertex_pairs is None``, otherwise with :math:`O(\left<k\right>P)` where ``vertex_pairs is None``, otherwise with :math:`O(\left<k\right>P)` where
:math:`P` is the length of ``vertex_pairs``. :math:`P` is the length of ``vertex_pairs``.
...@@ -371,8 +395,14 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True, ...@@ -371,8 +395,14 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True,
"The link-prediction problem for social networks", Journal of the "The link-prediction problem for social networks", Journal of the
American Society for Information Science and Technology, Volume 58, Issue American Society for Information Science and Technology, Volume 58, Issue
7, pages 1019–1031 (2007), :doi:`10.1002/asi.20591` 7, pages 1019–1031 (2007), :doi:`10.1002/asi.20591`
""" """
if eweight is None:
eweight = libcore.any()
else:
eweight = _prop("e", g, eweight)
if vertex_pairs is None: if vertex_pairs is None:
if sim_map is None: if sim_map is None:
s = g.new_vp("vector<double>") s = g.new_vp("vector<double>")
...@@ -381,29 +411,30 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True, ...@@ -381,29 +411,30 @@ def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True,
if sim_type == "dice": if sim_type == "dice":
libgraph_tool_topology.dice_similarity(g._Graph__graph, libgraph_tool_topology.dice_similarity(g._Graph__graph,
_prop("v", g, s), _prop("v", g, s),
self_loops) eweight)
elif sim_type == "jaccard": elif sim_type == "jaccard":
libgraph_tool_topology.jaccard_similarity(g._Graph__graph, libgraph_tool_topology.jaccard_similarity(g._Graph__graph,
_prop("v", g, s), _prop("v", g, s),
self_loops) eweight)
elif sim_type == "inv-log-weight": elif sim_type == "inv-log-weight":
libgraph_tool_topology.inv_log_weight_similarity(g._Graph__graph, libgraph_tool_topology.inv_log_weight_similarity(g._Graph__graph,
_prop("v", g, s)) _prop("v", g, s),
eweight)
else: else:
vertex_pairs = numpy.asarray(vertex_pairs, dtype="int64") vertex_pairs = numpy.asarray(vertex_pairs, dtype="int64")
s = numpy.zeros(vertex_pairs.shape[0], dtype="double") s = numpy.zeros(vertex_pairs.shape[0], dtype="double")
if sim_type == "dice": if sim_type == "dice":
libgraph_tool_topology.dice_similarity_pairs(g._Graph__graph, libgraph_tool_topology.dice_similarity_pairs(g._Graph__graph,
vertex_pairs, vertex_pairs,
s, self_loops) s, eweight)
elif sim_type == "jaccard": elif sim_type == "jaccard":
libgraph_tool_topology.jaccard_similarity_pairs(g._Graph__graph, libgraph_tool_topology.jaccard_similarity_pairs(g._Graph__graph,
vertex_pairs, vertex_pairs,
s, self_loops) s, eweight)
elif sim_type == "inv-log-weight": elif sim_type == "inv-log-weight":
libgraph_tool_topology.\ libgraph_tool_topology.\
inv_log_weight_similarity_pairs(g._Graph__graph, vertex_pairs, inv_log_weight_similarity_pairs(g._Graph__graph, vertex_pairs,
s) s, eweight)
return s return s
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment