Commit c8d01f45 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

topology.similarity(): Add support for edge weights

parent 9668242a
......@@ -33,6 +33,7 @@ libgraph_tool_topology_la_SOURCES = \
graph_reciprocity.cc \
graph_sequential_color.cc \
graph_similarity.cc \
graph_similarity_imp.cc \
graph_subgraph_isomorphism.cc \
graph_topological_sort.cc \
graph_topology.cc \
......
......@@ -29,35 +29,51 @@ using namespace std;
using namespace boost;
using namespace graph_tool;
size_t similarity(GraphInterface& gi1, GraphInterface& gi2, boost::any label1,
boost::any label2)
template <class Type, class Index>
auto uncheck(boost::unchecked_vector_property_map<Type,Index>, boost::any p)
{
size_t s = 0;
gt_dispatch<>()
(std::bind(get_similarity(), std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3, label2, std::ref(s)),
all_graph_views(),
all_graph_views(),
writable_vertex_properties())
(gi1.get_graph_view(), gi2.get_graph_view(), label1);
return s;
return boost::any_cast<boost::checked_vector_property_map<Type,Index>>(p).get_unchecked();
}
template <class T>
auto&& uncheck(T&&, boost::any p)
{
return boost::any_cast<T>(p);
}
size_t similarity_fast(GraphInterface& gi1, GraphInterface& gi2, boost::any label1,
boost::any label2)
typedef UnityPropertyMap<size_t,GraphInterface::edge_t> ecmap_t;
typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
weight_props_t;
python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2)
{
size_t s = 0;
gt_dispatch<boost::mpl::true_>()
(std::bind(get_similarity_fast(), std::placeholders::_1,
std::placeholders::_2, std::placeholders::_3,
label2, std::ref(s)),
if (weight1.empty())
weight1 = ecmap_t();
if (weight2.empty())
weight2 = ecmap_t();
python::object s;
gt_dispatch<>()
([&](const auto& g1, const auto& g2, auto ew1, auto l1)
{
auto l2 = uncheck(l1, label2);
auto ew2 = uncheck(ew1, weight2);
auto ret = get_similarity(g1, g2, ew1, ew2, l1, l2);
s = python::object(ret);
},
all_graph_views(),
all_graph_views(),
vertex_integer_properties())
(gi1.get_graph_view(), gi2.get_graph_view(), label1);
weight_props_t(),
writable_vertex_properties())
(gi1.get_graph_view(), gi2.get_graph_view(), weight1, label1);
return s;
}
python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2);
void export_similarity()
{
python::def("similarity", &similarity);
......
......@@ -25,132 +25,161 @@ namespace graph_tool
using namespace std;
using namespace boost;
template <class Keys, class Set1, class Set2>
size_t intersection_size(Keys& ks, Set1& s1, Set2& s2)
auto set_difference(Keys& ks, Set1& s1, Set2& s2)
{
size_t s = 0;
for (auto k : ks)
typename Set1::value_type::second_type s = 0;
for (auto& k : ks)
{
int c1 = s1.count(k);
int c2 = s2.count(k);
s += max(c1, c2) - abs(c1 - c2);
auto x1 = s1[k];
auto x2 = s2[k];
s += std::max(x1, x2) - std::min(x1, x2);
}
return s;
}
struct get_similarity
template <class Vertex, class WeightMap, class LabelMap, class Graph1, class Graph2>
auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
LabelMap& l1, LabelMap& l2, const Graph1& g1,
const Graph2& g2)
{
template <class Graph1, class Graph2, class LabelMap>
void operator()(const Graph1& g1, const Graph2& g2, LabelMap l1,
boost::any al2, size_t& s) const
{
LabelMap l2 = boost::any_cast<typename LabelMap::checked_t>(al2).get_unchecked(num_vertices(g2));
typedef typename property_traits<LabelMap>::value_type label_t;
typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename property_traits<LabelMap>::value_type label_t;
std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1;
std::unordered_map<label_t, val_t> adj2;
std::unordered_map<label_t, typename graph_traits<Graph1>::vertex_descriptor>
lmap1;
std::unordered_map<label_t, typename graph_traits<Graph2>::vertex_descriptor>
lmap2;
for (auto v : vertices_range(g1))
lmap1[get(l1, v)] = v;
for (auto v : vertices_range(g2))
lmap2[get(l2, v)] = v;
if (v1 != graph_traits<Graph1>::null_vertex())
{
for (auto e : out_edges_range(v1, g1))
{
auto w = ew1[e];
auto k = get(l1, target(e, g1));
adj1[k] += w;
keys.insert(k);
}
}
s = 0;
for (auto& lv1 : lmap1)
if (v2 != graph_traits<Graph2>::null_vertex())
{
for (auto e : out_edges_range(v2, g2))
{
auto v1 = lv1.second;
auto li2 = lmap2.find(lv1.first);
if (li2 == lmap2.end())
continue;
auto v2 = li2->second;
std::unordered_set<label_t> keys;
std::unordered_multiset<label_t> adj1;
std::unordered_multiset<label_t> adj2;
for (auto a1 : adjacent_vertices_range(v1, g1))
{
adj1.insert(get(l1, a1));
keys.insert(get(l1, a1));
}
for (auto a2 : adjacent_vertices_range(v2, g2))
{
adj2.insert(get(l2, a2));
keys.insert(get(l2, a2));
}
s += intersection_size(keys, adj1, adj2);
auto w = ew2[e];
auto k = get(l2, target(e, g2));
adj2[k] += w;
keys.insert(k);
}
}
};
struct get_similarity_fast
return set_difference(keys, adj1, adj2);
}
template <class Graph1, class Graph2, class WeightMap, class LabelMap>
auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
WeightMap ew2, LabelMap l1, LabelMap l2)
{
template <class Graph1, class Graph2, class LabelMap>
void operator()(const Graph1& g1, const Graph2& g2, LabelMap l1,
boost::any al2, size_t& s) const
typedef typename property_traits<LabelMap>::value_type label_t;
typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t;
std::unordered_map<label_t, vertex_t> lmap1;
std::unordered_map<label_t, vertex_t> lmap2;
for (auto v : vertices_range(g1))
lmap1[get(l1, v)] = v;
for (auto v : vertices_range(g2))
lmap2[get(l2, v)] = v;
val_t s = 0;
for (auto& lv1 : lmap1)
{
LabelMap l2 = boost::any_cast<LabelMap>(al2);
vertex_t v1 = lv1.second;
vertex_t v2;
typedef typename property_traits<LabelMap>::value_type label_t;
auto li2 = lmap2.find(lv1.first);
if (li2 == lmap2.end())
v2 = graph_traits<Graph2>::null_vertex();
else
v2 = li2->second;
vector<typename graph_traits<Graph1>::vertex_descriptor> lmap1;
vector<typename graph_traits<Graph1>::vertex_descriptor> lmap2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2);
}
for (auto v : vertices_range(g1))
{
size_t i = get(l1, v);
if (lmap1.size() <= i)
lmap1.resize(i + 1);
lmap1[i] = v;
}
for (auto& lv2 : lmap2)
{
vertex_t v2 = lv2.second;
vertex_t v1;
for (auto v : vertices_range(g2))
{
size_t i = get(l2, v);
if (lmap2.size() <= i)
lmap2.resize(i + 1);
lmap2[i] = v;
}
auto li1 = lmap1.find(lv2.first);
if (li1 == lmap1.end())
v1 = graph_traits<Graph2>::null_vertex();
else
continue;
size_t ss = 0;
#pragma omp parallel if (num_vertices(g1) > OPENMP_MIN_THRESH) \
reduction(+:ss)
parallel_loop_no_spawn
(lmap1,
[&](size_t i, auto v1)
{
auto v2 = lmap2[i];
std::unordered_set<label_t> keys;
std::unordered_multiset<label_t> adj1;
std::unordered_multiset<label_t> adj2;
for (auto a1 : adjacent_vertices_range(v1, g1))
{
adj1.insert(get(l1, a1));
keys.insert(get(l1, a1));
}
for (auto a2 : adjacent_vertices_range(v2, g2))
{
adj2.insert(get(l2, a2));
keys.insert(get(l2, a2));
}
ss += intersection_size(keys, adj1, adj2);
});
s = ss;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2);
}
};
return s;
}
template <class Graph1, class Graph2, class WeightMap, class LabelMap>
auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
WeightMap ew2, LabelMap l1, LabelMap l2)
{
typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t;
vector<vertex_t> lmap1, lmap2;
for (auto v : vertices_range(g1))
{
size_t i = get(l1, v);
if (lmap1.size() <= i)
lmap1.resize(i + 1, graph_traits<Graph1>::null_vertex());
lmap1[i] = v;
}
for (auto v : vertices_range(g2))
{
size_t i = get(l2, v);
if (lmap2.size() <= i)
lmap2.resize(i + 1, graph_traits<Graph2>::null_vertex());
lmap2[i] = v;
}
size_t N = std::max(lmap1.size(), lmap2.size());
lmap1.resize(N, graph_traits<Graph1>::null_vertex());
lmap2.resize(N, graph_traits<Graph2>::null_vertex());
val_t s = 0;
#pragma omp parallel if (num_vertices(g1) > OPENMP_MIN_THRESH) \
reduction(+:s)
parallel_loop_no_spawn
(lmap1,
[&](size_t i, auto v1)
{
auto v2 = lmap2[i];
if (v1 == graph_traits<Graph1>::null_vertex() &&
v2 == graph_traits<Graph1>::null_vertex())
return;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2);
});
#pragma omp parallel if (num_vertices(g2) > OPENMP_MIN_THRESH) \
reduction(+:s)
parallel_loop_no_spawn
(lmap2,
[&](size_t i, auto v2)
{
auto v1 = lmap1[i];
if (v1 != graph_traits<Graph1>::null_vertex() ||
v2 == graph_traits<Graph1>::null_vertex())
return;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2);
});
return s;
}
} // graph_tool namespace
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_python_interface.hh"
#include "graph.hh"
#include "graph_filtering.hh"
#include "graph_properties.hh"
#include "graph_selectors.hh"
#include "graph_similarity.hh"
using namespace std;
using namespace boost;
using namespace graph_tool;
template <class Type, class Index>
auto uncheck(boost::unchecked_vector_property_map<Type,Index>, boost::any p)
{
return boost::any_cast<boost::checked_vector_property_map<Type,Index>>(p).get_unchecked();
}
template <class T>
auto&& uncheck(T&&, boost::any p)
{
return boost::any_cast<T>(p);
}
typedef UnityPropertyMap<size_t,GraphInterface::edge_t> ecmap_t;
typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
weight_props_t;
python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2)
{
if (weight1.empty())
weight1 = ecmap_t();
if (weight2.empty())
weight2 = ecmap_t();
python::object s;
gt_dispatch<>()
([&](const auto& g1, const auto& g2, auto ew1, auto l1)
{
auto l2 = uncheck(l1, label2);
auto ew2 = uncheck(ew1, weight2);
auto ret = get_similarity_fast(g1, g2, ew1, ew2, l1, l2);
s = python::object(ret);
},
all_graph_views(),
all_graph_views(),
weight_props_t(),
vertex_integer_properties())
(gi1.get_graph_view(), gi2.get_graph_view(), weight1, label1);
return s;
}
......@@ -88,7 +88,8 @@ __all__ = ["isomorphism", "subgraph_isomorphism", "mark_subgraph",
"is_planar", "make_maximal_planar", "similarity", "vertex_similarity",
"edge_reciprocity"]
def similarity(g1, g2, label1=None, label2=None, norm=True):
def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
norm=True, distance=False):
r"""Return the adjacency similarity between the two graphs.
Parameters
......@@ -97,6 +98,10 @@ def similarity(g1, g2, label1=None, label2=None, norm=True):
First graph to be compared.
g2 : :class:`~graph_tool.Graph`
Second graph to be compared.
eweight1 : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
Edge weights for the first graph to be used in comparison.
eweight2 : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
Edge weights for the second graph to be used in comparison.
label1 : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
Vertex labels for the first graph to be used in comparison. If not
supplied, the vertex indexes are used.
......@@ -106,6 +111,9 @@ def similarity(g1, g2, label1=None, label2=None, norm=True):
norm : bool (optional, default: ``True``)
If ``True``, the returned value is normalized by the total number of
edges.
distance : bool (optional, default: ``False``)
If ``True``, the complementary value is returned, i.e. the distance
between the two graphs.
Returns
-------
......@@ -116,11 +124,34 @@ def similarity(g1, g2, label1=None, label2=None, norm=True):
-----
The adjacency similarity is the sum of equal entries in the adjacency
matrix, given a vertex ordering determined by the vertex labels. In other
words it counts the number of edges which have the same source and target
words, it counts the number of edges which have the same source and target
labels in both graphs.
More specifically, it is defined as:
.. math::
S(\boldsymbol A_1, \boldsymbol A_2) = E - d(\boldsymbol A_1, \boldsymbol A_2)
where
.. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} |A_{ij}^{(1)} - A_{ij}^{(2)}|
is the distance between graphs, and :math:`E=\sum_{i<j}|A_{ij}^{(1)}| +
|A_{ij}^{(2)}|`. This definition holds for undirected graphs, otherwise the
sums go over all directed pairs. If weights are provided, the weighted
adjacency matrix is used.
If ``norm == True`` the value returned is
:math:`S(\boldsymbol A_1, \boldsymbol A_2) / E`.
The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`.
If enabled during compilation, and the vertex labels are integers, this
algorithm runs in parallel.
Examples
--------
.. testcode::
......@@ -138,29 +169,63 @@ def similarity(g1, g2, label1=None, label2=None, norm=True):
24
>>> gt.similarity(u, g)
0.04666666666666667
"""
if label1 is None:
label1 = g1.vertex_index
if label2 is None:
label2 = g2.vertex_index
_check_prop_scalar(label1, name="label1")
_check_prop_scalar(label2, name="label2")
if label1.value_type() != label2.value_type():
try:
label2 = label2.copy(label1.value_type())
except ValueError:
label1 = label1.copy(label2.value_type())
if eweight1 is None and eweight1 is None:
ew1 = ew2 = libcore.any()
else:
if eweight1 is None:
eweight1 = g1.new_ep(eweight2.value_type(), 1)
if eweight2 is None:
eweight2 = g2.new_ep(eweight1.value_type(), 1)
_check_prop_scalar(eweight1, name="eweight1")
_check_prop_scalar(eweight2, name="eweight2")
if eweight1.value_type() != eweight2.value_type():
try:
eweight2 = eweight2.copy(eweight1.value_type())
except ValueError:
eweight1 = eweight1.copy(eweight2.value_type())
ew1 = _prop("e", g1, eweight1)
ew2 = _prop("e", g2, eweight2)
if label1.is_writable() or label2.is_writable():
s = libgraph_tool_topology.\
similarity(g1._Graph__graph, g2._Graph__graph,
_prop("v", g1, label1), _prop("v", g2, label2))
ew1, ew2, _prop("v", g1, label1),
_prop("v", g2, label2))
else:
s = libgraph_tool_topology.\
similarity_fast(g1._Graph__graph, g2._Graph__graph,
_prop("v", g1, label1), _prop("v", g2, label2))
ew1, ew2, _prop("v", g1, label1),
_prop("v", g2, label2))
if not g1.is_directed() or not g2.is_directed():
s /= 2
if eweight1 is None and eweight1 is None:
E = g1.num_edges() + g2.num_edges()
else:
E = float(abs(eweight1.fa).sum() + abs(eweight2.fa).sum())
if not distance:
s = E - s
if norm:
s /= float(max(g1.num_edges(), g2.num_edges()))
return s / E
return s
@_limit_args({"sim_type": ["dice", "jaccard", "inv-log-weight"]})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment