Commit e173b204 authored by Tiago Peixoto's avatar Tiago Peixoto

Implement topology.vertex_similarity()

This fixes issue #287
parent 058b172a
......@@ -36,10 +36,12 @@ libgraph_tool_topology_la_SOURCES = \
graph_topological_sort.cc \
graph_topology.cc \
graph_tsp.cc \
graph_transitive_closure.cc
graph_transitive_closure.cc \
graph_vertex_similarity.cc
libgraph_tool_topology_la_include_HEADERS = \
graph_components.hh \
graph_kcore.hh \
graph_similarity.hh
\ No newline at end of file
graph_similarity.hh \
graph_vertex_similarity.hh
......@@ -58,6 +58,7 @@ void export_all_dists();
void export_diam();
void export_random_matching();
void export_maximal_vertex_set();
void export_vertex_similarity();
BOOST_PYTHON_MODULE(libgraph_tool_topology)
......@@ -84,4 +85,5 @@ BOOST_PYTHON_MODULE(libgraph_tool_topology)
export_diam();
export_random_matching();
export_maximal_vertex_set();
export_vertex_similarity();
}
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <boost/python.hpp>
#include "graph_tool.hh"
#include "graph_vertex_similarity.hh"
#include "numpy_bind.hh"
using namespace std;
using namespace boost;
using namespace graph_tool;
void get_dice_similarity(GraphInterface& gi, boost::any as, bool self_loop)
{
gt_dispatch<>()
([&](auto& g, auto& s)
{
all_pairs_similarity(g, s,
[&](auto u, auto v, auto& mask)
{
return dice(u, v, self_loop, mask, g);
});
},
all_graph_views(), vertex_floating_vector_properties())
(gi.get_graph_view(), as);
}
void get_dice_similarity_pairs(GraphInterface& gi, python::object opairs,
python::object osim, bool self_loop)
{
multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs);
multi_array_ref<double,1> sim = get_array<double,1>(osim);
gt_dispatch<>()
([&](auto& g)
{
some_pairs_similarity(g, pairs, sim,
[&](auto u, auto v, auto& mask)
{
return dice(u, v, self_loop, mask, g);
});
},
all_graph_views())
(gi.get_graph_view());
}
void get_jaccard_similarity(GraphInterface& gi, boost::any as, bool self_loop)
{
gt_dispatch<>()
([&](auto& g, auto& s)
{
all_pairs_similarity(g, s,
[&](auto u, auto v, auto& mask)
{
return jaccard(u, v, self_loop, mask, g);
});
},
all_graph_views(), vertex_floating_vector_properties())
(gi.get_graph_view(), as);
}
void get_jaccard_similarity_pairs(GraphInterface& gi, python::object opairs,
python::object osim, bool self_loop)
{
multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs);
multi_array_ref<double,1> sim = get_array<double,1>(osim);
gt_dispatch<>()
([&](auto& g)
{
some_pairs_similarity(g, pairs, sim,
[&](auto u, auto v, auto& mask)
{
return jaccard(u, v, self_loop, mask, g);
});
},
all_graph_views())
(gi.get_graph_view());
}
void get_inv_log_weight_similarity(GraphInterface& gi, boost::any as)
{
gt_dispatch<>()
([&](auto& g, auto& s)
{
all_pairs_similarity(g, s,
[&](auto u, auto v, auto& mask)
{
return inv_log_weighted(u, v, mask, g);
});
},
all_graph_views(), vertex_floating_vector_properties())
(gi.get_graph_view(), as);
}
void get_inv_log_weight_similarity_pairs(GraphInterface& gi,
python::object opairs,
python::object osim)
{
multi_array_ref<int64_t,2> pairs = get_array<int64_t,2>(opairs);
multi_array_ref<double,1> sim = get_array<double,1>(osim);
gt_dispatch<>()
([&](auto& g)
{
some_pairs_similarity(g, pairs, sim,
[&](auto u, auto v, auto& mask)
{
return inv_log_weighted(u, v, mask, g);
});
},
all_graph_views())
(gi.get_graph_view());
}
void export_vertex_similarity()
{
python::def("dice_similarity", &get_dice_similarity);
python::def("dice_similarity_pairs", &get_dice_similarity_pairs);
python::def("jaccard_similarity", &get_jaccard_similarity);
python::def("jaccard_similarity_pairs", &get_jaccard_similarity_pairs);
python::def("inv_log_weight_similarity", &get_inv_log_weight_similarity);
python::def("inv_log_weight_similarity_pairs",
&get_inv_log_weight_similarity_pairs);
};
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef GRAPH_VERTEX_SIMILARITY_HH
#define GRAPH_VERTEX_SIMILARITY_HH
#include "graph_util.hh"
namespace graph_tool
{
using namespace std;
using namespace boost;
template <class Graph, class Vertex, class Mark>
double dice(Vertex u, Vertex v, bool self_loop, Mark& mark, Graph& g)
{
size_t count = 0;
for (auto w : adjacent_vertices_range(u, g))
mark[w] = true;
if (self_loop)
mark[u] = true;
for (auto w : adjacent_vertices_range(v, g))
{
if (mark[w])
count++;
}
for (auto w : adjacent_vertices_range(u, g))
mark[w] = false;
if (self_loop)
mark[u] = false;
return 2 * count / double(out_degree(u, g) + out_degree(v, g));
}
template <class Graph, class Vertex, class Mark>
double jaccard(Vertex u, Vertex v, bool self_loop, Mark& mark, Graph& g)
{
size_t count = 0, total = 0;
for (auto w : adjacent_vertices_range(u, g))
{
mark[w] = true;
total++;
}
if (self_loop)
mark[u] = true;
for (auto w : adjacent_vertices_range(v, g))
{
if (mark[w])
count++;
else
total++;
}
for (auto w : adjacent_vertices_range(u, g))
mark[w] = false;
if (self_loop)
mark[u] = false;
return count / double(total);
}
template <class Graph, class Vertex, class Mark>
double inv_log_weighted(Vertex u, Vertex v, Mark& mark, Graph& g)
{
double count = 0;
for (auto w : adjacent_vertices_range(u, g))
mark[w] = true;
for (auto w : adjacent_vertices_range(v, g))
{
if (mark[w])
{
if (is_directed::apply<Graph>::type::value)
count += 1. / log(in_degreeS()(w, g));
else
count += 1. / log(out_degree(w, g));
}
}
for (auto w : adjacent_vertices_range(u, g))
mark[w] = false;
return count;
}
template <class Graph, class VMap, class Sim>
void all_pairs_similarity(Graph& g, VMap s, Sim&& f)
{
size_t i, N = num_vertices(g);
vector<bool> mask(N, false);
#pragma omp parallel for default(shared) private(i) schedule(runtime) \
firstprivate(mask) if (N > OPENMP_MIN_THRESH)
for (i = 0; i < N; ++i)
{
auto v = vertex(i, g);
if (!is_valid_vertex(v, g))
continue;
s[v].resize(num_vertices(g));
for (auto w : vertices_range(g))
s[v][w] = f(v, w, mask);
}
}
template <class Graph, class Vlist, class Slist, class Sim>
void some_pairs_similarity(Graph& g, Vlist& vlist, Slist& slist, Sim&& f)
{
vector<bool> mask(num_vertices(g), false);
size_t i, N = vlist.shape()[0];
#pragma omp parallel for default(shared) private(i) schedule(runtime) \
firstprivate(mask) if (N > OPENMP_MIN_THRESH)
for (i = 0; i < N; ++i)
{
size_t u = vlist[i][0];
size_t v = vlist[i][1];
slist[i] = f(u, v, mask);
}
}
} // graph_tool namespace
#endif // GRAPH_VERTEX_SIMILARITY_HH
......@@ -35,6 +35,7 @@ Summary
all_paths
pseudo_diameter
similarity
vertex_similarity
isomorphism
subgraph_isomorphism
mark_subgraph
......@@ -70,7 +71,7 @@ dl_import("from . import libgraph_tool_topology")
from .. import _prop, Vector_int32_t, _check_prop_writable, \
_check_prop_scalar, _check_prop_vector, Graph, PropertyMap, GraphView,\
libcore, _get_rng, _degree, perfect_prop_hash
libcore, _get_rng, _degree, perfect_prop_hash, _limit_args
from .. stats import label_self_loops
import random, sys, numpy, collections
......@@ -83,7 +84,8 @@ __all__ = ["isomorphism", "subgraph_isomorphism", "mark_subgraph",
"label_out_component", "kcore_decomposition", "shortest_distance",
"shortest_path", "all_shortest_paths", "all_predecessors",
"all_paths", "pseudo_diameter", "is_bipartite", "is_DAG",
"is_planar", "make_maximal_planar", "similarity", "edge_reciprocity"]
"is_planar", "make_maximal_planar", "similarity", "vertex_similarity",
"edge_reciprocity"]
def similarity(g1, g2, label1=None, label2=None, norm=True):
r"""Return the adjacency similarity between the two graphs.
......@@ -160,6 +162,147 @@ def similarity(g1, g2, label1=None, label2=None, norm=True):
s /= float(max(g1.num_edges(), g2.num_edges()))
return s
@_limit_args({"sim_type": ["dice", "jaccard", "inv-log-weight"]})
def vertex_similarity(g, sim_type="jaccard", vertex_pairs=None, self_loops=True,
sim_map=None):
r"""Return the similarity between pairs of vertices.
Parameters
----------
g : :class:`~graph_tool.Graph`
The graph to be used.
sim_type : ``str`` (optional, default: ``"jaccard"``)
Type of similarity to use. This must be one of ``"dice"``, ``"jaccard"``
or ``"inv-log-weight"``.
vertex_pairs : iterable of pairs of integers (optional, default: ``None``)
Pairs of vertices to compute the similarity. If omitted, all pairs will
be considered.
self_loops : bool (optional, default: ``True``)
If ``True``, vertices will be considered adjacent to themselves for the
purpose of the similarity computation.
sim_map : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
If provided, and ``vertex_pairs == None``, the vertex similarities will
be stored in this vector-valued property. Otherwise, a new one will be
created.
Returns
-------
similarities : :class:`numpy.ndarray` or :class:`~graph_tool.PropertyMap`
If ``vertex_pairs`` was supplied, this will be a :class:`numpy.ndarray`
with the corresponding similarities, otherwise it will be a
vector-valued vertex :class:`~graph_tool.PropertyMap`, with the
similarities to all other vertices.
Notes
-----
According to ``sim_type``, this function computes the following similarities:
``sim_type == "dice"``
The Sørensen–Dice similarity [sorensen-dice]_ is twice the number of
common neighbours between two nodes divided by the sum of their degrees.
``sim_type == "jaccard"``
The Jaccard similarity [jaccard]_ is the number of common neighbours
between two nodes divided by the size of the set of all neighbours to
both vertices.
``sim_type == "inv-log-weight"``
The inverse log weighted similarity [adamic-friends-2003]_ is the sum of
the weights of common neighbours between two vertices, where the weights
are computed as :math:`1/\log(k)`, with :math:`k` being the degree of the
node.
For directed graphs, only out-neighbours are considered in the above
algorthms (for "inv-log-weight", the in-degrees are used to compute the
weights). To use the in-neighbours instead, a :class:`~graph_tool.GraphView`
should be used to reverse the graph, e.g. ``vertex_similarity(GraphView(g,
reversed=True))``.
The algorithm runs with complexity :math:`O(\left<k\right>N^2)` if
``vertex_pairs == None``, otherwise with :math:`O(\left<k\right>P)` where
:math:`P` is the length of ``vertex_pairs``.
If enabled during compilation, this algorithm runs in parallel.
Examples
--------
.. testcode::
:hide:
import matplotlib
>>> g = gt.collection.data["polbooks"]
>>> s = gt.vertex_similarity(g, "jaccard")
>>> color = g.new_vp("double")
>>> color.a = s[0].a
>>> gt.graph_draw(g, pos=g.vp.pos, vertex_text=g.vertex_index,
... vertex_color=color, vertex_fill_color=color,
... vcmap=matplotlib.cm.inferno,
... output="polbooks-jaccard.pdf")
<...>
.. testcode::
:hide:
gt.graph_draw(g, pos=g.vp.pos, vertex_text=g.vertex_index,
vertex_color=color, vertex_fill_color=color,
vcmap=matplotlib.cm.inferno,
output="polbooks-jaccard.png")
.. figure:: polbooks-jaccard.*
Jaccard similarities to vertex ``0`` in a political books network.
References
----------
.. [sorensen-dice] https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
.. [jaccard] https://en.wikipedia.org/wiki/Jaccard_index
.. [adamic-friends-2003] Lada A. Adamic and Eytan Adar, "Friends and neighbors
on the Web", Social Networks Volume 25, Issue 3, Pages 211–230 (2003)
:doi:`10.1016/S0378-8733(03)00009-1`
.. [liben-nowell-link-prediction-2007] David Liben-Nowell and Jon Kleinberg,
"The link-prediction problem for social networks", Journal of the
American Society for Information Science and Technology, Volume 58, Issue
7, pages 1019–1031 (2007), :doi:`10.1002/asi.20591`
"""
if vertex_pairs is None:
if sim_map is None:
s = g.new_vp("vector<double>")
else:
s = sim_map
if sim_type == "dice":
libgraph_tool_topology.dice_similarity(g._Graph__graph,
_prop("v", g, s),
self_loops)
elif sim_type == "jaccard":
libgraph_tool_topology.jaccard_similarity(g._Graph__graph,
_prop("v", g, s),
self_loops)
elif sim_type == "inv-log-weight":
libgraph_tool_topology.inv_log_weight_similarity(g._Graph__graph,
_prop("v", g, s))
else:
vertex_pairs = numpy.asarray(vertex_pairs, dtype="int64")
s = numpy.zeros(vertex_pairs.shape[0], dtype="double")
if sim_type == "dice":
libgraph_tool_topology.dice_similarity_pairs(g._Graph__graph,
vertex_pairs,
s, self_loops)
elif sim_type == "jaccard":
libgraph_tool_topology.jaccard_similarity_pairs(g._Graph__graph,
vertex_pairs,
s, self_loops)
elif sim_type == "inv-log-weight":
libgraph_tool_topology.\
inv_log_weight_similarity_pairs(g._Graph__graph, vertex_pairs,
s)
return s
def isomorphism(g1, g2, vertex_inv1=None, vertex_inv2=None, isomap=False):
r"""Check whether two graphs are isomorphic.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment