Commit b7793fcc authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add generalized L^p norm to similarity()

parent 9bd68ef9
Pipeline #408 failed with stage
in 486 minutes and 50 seconds
...@@ -47,7 +47,8 @@ typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type ...@@ -47,7 +47,8 @@ typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
python::object similarity(GraphInterface& gi1, GraphInterface& gi2, python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2, boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2, bool asym) boost::any label1, boost::any label2, double norm,
bool asym)
{ {
if (weight1.empty()) if (weight1.empty())
weight1 = ecmap_t(); weight1 = ecmap_t();
...@@ -59,7 +60,7 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2, ...@@ -59,7 +60,7 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
{ {
auto l2 = uncheck(l1, label2); auto l2 = uncheck(l1, label2);
auto ew2 = uncheck(ew1, weight2); auto ew2 = uncheck(ew1, weight2);
auto ret = get_similarity(g1, g2, ew1, ew2, l1, l2, asym); auto ret = get_similarity(g1, g2, ew1, ew2, l1, l2, asym, norm);
s = python::object(ret); s = python::object(ret);
}, },
all_graph_views(), all_graph_views(),
...@@ -72,7 +73,8 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2, ...@@ -72,7 +73,8 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2, python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2, boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2, bool asym); boost::any label1, boost::any label2,
double norm, bool asym);
void export_similarity() void export_similarity()
{ {
......
...@@ -26,28 +26,27 @@ namespace graph_tool ...@@ -26,28 +26,27 @@ namespace graph_tool
using namespace std; using namespace std;
using namespace boost; using namespace boost;
template <class Map, class K> template <bool normed, class Keys, class Set1, class Set2>
typename Map::value_type::second_type get_map(Map& m, K&& k) auto set_difference(Keys& ks, Set1& s1, Set2& s2, double norm, bool asym)
{
auto iter = m.find(k);
if (iter == m.end())
return typename Map::value_type::second_type(0);
return iter->second;
}
template <class Keys, class Set1, class Set2>
auto set_difference(Keys& ks, Set1& s1, Set2& s2, bool asym)
{ {
typename Set1::value_type::second_type s = 0; typename Set1::value_type::second_type s = 0;
auto ndispatch = [&](auto x){ return normed ? std::pow(x, norm) : x; };
auto get_map =
[&](auto& m, auto&& k)
{
auto iter = m.find(k);
if (iter == m.end())
return decltype(iter->second)(0);
return iter->second;
};
for (auto& k : ks) for (auto& k : ks)
{ {
auto x1 = get_map(s1, k); auto x1 = get_map(s1, k);
auto x2 = get_map(s2, k); auto x2 = get_map(s2, k);
if (x1 > x2) if (x1 > x2)
s += x1 - x2; s += ndispatch(x1 - x2);
else if (!asym) else if (!asym)
s += x2 - x1; s += ndispatch(x2 - x1);
} }
return s; return s;
} }
...@@ -57,7 +56,7 @@ template <class Vertex, class WeightMap, class LabelMap, ...@@ -57,7 +56,7 @@ template <class Vertex, class WeightMap, class LabelMap,
auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2, auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
LabelMap& l1, LabelMap& l2, const Graph1& g1, LabelMap& l1, LabelMap& l2, const Graph1& g1,
const Graph2& g2, bool asym, Keys& keys, Adj& adj1, const Graph2& g2, bool asym, Keys& keys, Adj& adj1,
Adj& adj2) Adj& adj2, double norm)
{ {
if (v1 != graph_traits<Graph1>::null_vertex()) if (v1 != graph_traits<Graph1>::null_vertex())
{ {
...@@ -81,12 +80,16 @@ auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2, ...@@ -81,12 +80,16 @@ auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
} }
} }
return set_difference(keys, adj1, adj2, asym); if (norm == 1)
return set_difference<false>(keys, adj1, adj2, 1, asym);
else
return set_difference<true>(keys, adj1, adj2, norm, asym);
} }
template <class Graph1, class Graph2, class WeightMap, class LabelMap> template <class Graph1, class Graph2, class WeightMap, class LabelMap>
auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1, auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
WeightMap ew2, LabelMap l1, LabelMap l2, bool asym) WeightMap ew2, LabelMap l1, LabelMap l2, double norm,
bool asym)
{ {
typedef typename property_traits<LabelMap>::value_type label_t; typedef typename property_traits<LabelMap>::value_type label_t;
typedef typename property_traits<WeightMap>::value_type val_t; typedef typename property_traits<WeightMap>::value_type val_t;
...@@ -116,7 +119,7 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -116,7 +119,7 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
std::unordered_map<label_t, val_t> adj1, adj2; std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym, keys, s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym, keys,
adj1, adj2); adj1, adj2, norm);
} }
if (!asym) if (!asym)
...@@ -136,7 +139,7 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -136,7 +139,7 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
std::unordered_map<label_t, val_t> adj1, adj2; std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false, s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2); keys, adj1, adj2, norm);
} }
} }
return s; return s;
...@@ -144,7 +147,8 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -144,7 +147,8 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
template <class Graph1, class Graph2, class WeightMap, class LabelMap> template <class Graph1, class Graph2, class WeightMap, class LabelMap>
auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
WeightMap ew2, LabelMap l1, LabelMap l2, bool asym) WeightMap ew2, LabelMap l1, LabelMap l2, double norm,
bool asym)
{ {
typedef typename property_traits<WeightMap>::value_type val_t; typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t;
...@@ -190,7 +194,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -190,7 +194,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
adj1.clear(); adj1.clear();
adj2.clear(); adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym, s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym,
keys, adj1, adj2); keys, adj1, adj2, norm);
}); });
if (!asym) if (!asym)
...@@ -209,7 +213,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -209,7 +213,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
adj1.clear(); adj1.clear();
adj2.clear(); adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false, s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2); keys, adj1, adj2, norm);
}); });
} }
......
...@@ -47,7 +47,8 @@ typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type ...@@ -47,7 +47,8 @@ typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2, python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2, boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2, bool asym) boost::any label1, boost::any label2,
double norm, bool asym)
{ {
if (weight1.empty()) if (weight1.empty())
weight1 = ecmap_t(); weight1 = ecmap_t();
...@@ -59,7 +60,7 @@ python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2, ...@@ -59,7 +60,7 @@ python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
{ {
auto l2 = uncheck(l1, label2); auto l2 = uncheck(l1, label2);
auto ew2 = uncheck(ew1, weight2); auto ew2 = uncheck(ew1, weight2);
auto ret = get_similarity_fast(g1, g2, ew1, ew2, l1, l2, asym); auto ret = get_similarity_fast(g1, g2, ew1, ew2, l1, l2, norm, asym);
s = python::object(ret); s = python::object(ret);
}, },
all_graph_views(), all_graph_views(),
......
...@@ -92,7 +92,7 @@ __all__ = ["isomorphism", "subgraph_isomorphism", "mark_subgraph", ...@@ -92,7 +92,7 @@ __all__ = ["isomorphism", "subgraph_isomorphism", "mark_subgraph",
"edge_reciprocity"] "edge_reciprocity"]
def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
norm=True, distance=False, asymmetric=False): norm=True, p=1., distance=False, asymmetric=False):
r"""Return the adjacency similarity between the two graphs. r"""Return the adjacency similarity between the two graphs.
Parameters Parameters
...@@ -114,6 +114,8 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -114,6 +114,8 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
norm : bool (optional, default: ``True``) norm : bool (optional, default: ``True``)
If ``True``, the returned value is normalized by the total number of If ``True``, the returned value is normalized by the total number of
edges. edges.
p : float (optional, default: ``1.``)
Exponent of the :math:`L^p` distance function.
distance : bool (optional, default: ``False``) distance : bool (optional, default: ``False``)
If ``True``, the complementary value is returned, i.e. the distance If ``True``, the complementary value is returned, i.e. the distance
between the two graphs. between the two graphs.
...@@ -128,10 +130,12 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -128,10 +130,12 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
Notes Notes
----- -----
The adjacency similarity is the sum of equal non-zero entries in the In its default parametrization, the adjacency similarity is the sum of equal
adjacency matrix, given a vertex ordering determined by the vertex non-zero entries in the adjacency matrix, given a vertex ordering determined
labels. In other words, it counts the number of edges which have the same by the vertex labels. In other words, it counts the number of edges which
source and target labels in both graphs. have the same source and target labels in both graphs. This function also
allows for generalized similarities according to an :math:`L^p` norm, for
arbitrary :math:`p`.
More specifically, it is defined as: More specifically, it is defined as:
...@@ -143,25 +147,26 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -143,25 +147,26 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
.. math:: .. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} |A_{ij}^{(1)} - A_{ij}^{(2)}| d(\boldsymbol A_1, \boldsymbol A_2) = \left(\sum_{i\le j} \left|A_{ij}^{(1)} - A_{ij}^{(2)}\right|^p\right)^{1/p}
is the distance between graphs, and :math:`E=\sum_{i<j}|A_{ij}^{(1)}| + is the distance between graphs, and :math:`E=(\sum_{i\le j}|A_{ij}^{(1)}|^p +
|A_{ij}^{(2)}|`. This definition holds for undirected graphs, otherwise the |A_{ij}^{(2)}|^p)^{1/p}`. Unless otherwise stated via the parameter ``p``,
sums go over all directed pairs. If weights are provided, the weighted the exponent used is :math:`p=1`. This definition holds for undirected
adjacency matrix is used. graphs, otherwise the sums go over all directed pairs. If weights are
provided, the weighted adjacency matrix is used.
If ``norm == True`` the value returned is If ``norm == True`` the value returned is :math:`S(\boldsymbol A_1,
:math:`S(\boldsymbol A_1, \boldsymbol A_2) / E`. \boldsymbol A_2) / E`, which lies in the interval :math:`[0,1]`.
If ``asymmetric == True``, the above is changed so that the comparison is If ``asymmetric == True``, the above is changed so that the comparison is
made only for entries in :math:`\boldsymbol A_1` that are larger than in :math:`\boldsymbol A_2`, i.e. made only for entries in :math:`\boldsymbol A_1` that are larger than in :math:`\boldsymbol A_2`, i.e.
.. math:: .. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} (A_{ij}^{(1)} - A_{ij}^{(2)}) H(A_{ij}^{(1)} - A_{ij}^{(2)}), d(\boldsymbol A_1, \boldsymbol A_2) = \left(\sum_{i\le j} \left(A_{ij}^{(1)} - A_{ij}^{(2)}\right)^p H(A_{ij}^{(1)} - A_{ij}^{(2)})\right)^{1/p},
where :math:`H(x)` is the unit step function, and the total sum is changed where :math:`H(x)` is the unit step function, and the total sum is changed
accordingly to :math:`E=\sum_{i<j}|A_{ij}^{(1)}|`. accordingly to :math:`E=\left(\sum_{i\le j}|A_{ij}^{(1)}|^p\right)^{1/p}`.
The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`. The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`.
...@@ -227,14 +232,18 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -227,14 +232,18 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
s = libgraph_tool_topology.\ s = libgraph_tool_topology.\
similarity(g1._Graph__graph, g2._Graph__graph, similarity(g1._Graph__graph, g2._Graph__graph,
ew1, ew2, _prop("v", g1, label1), ew1, ew2, _prop("v", g1, label1),
_prop("v", g2, label2), asymmetric) _prop("v", g2, label2), p, asymmetric)
else: else:
s = libgraph_tool_topology.\ s = libgraph_tool_topology.\
similarity_fast(g1._Graph__graph, g2._Graph__graph, similarity_fast(g1._Graph__graph, g2._Graph__graph,
ew1, ew2, _prop("v", g1, label1), ew1, ew2, _prop("v", g1, label1),
_prop("v", g2, label2), asymmetric) _prop("v", g2, label2), p, asymmetric)
if not g1.is_directed() or not g2.is_directed(): if not g1.is_directed() or not g2.is_directed():
s //= 2 s //= 2
s **= 1./p
if eweight1 is None and eweight1 is None: if eweight1 is None and eweight1 is None:
if asymmetric: if asymmetric:
E = g1.num_edges() E = g1.num_edges()
...@@ -242,9 +251,10 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -242,9 +251,10 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
E = g1.num_edges() + g2.num_edges() E = g1.num_edges() + g2.num_edges()
else: else:
if asymmetric: if asymmetric:
E = float(abs(eweight1.fa).sum()) E = float((abs(eweight1.fa)**p).sum()) ** (1./p)
else: else:
E = float(abs(eweight1.fa).sum() + abs(eweight2.fa).sum()) E = float((abs(eweight1.fa)**p).sum() +
(abs(eweight2.fa)**p).sum()) ** (1./p)
if not distance: if not distance:
s = E - s s = E - s
if norm: if norm:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment