Commit b7793fcc authored by Tiago Peixoto's avatar Tiago Peixoto

Add generalized L^p norm to similarity()

parent 9bd68ef9
Pipeline #408 failed with stage
in 486 minutes and 50 seconds
......@@ -47,7 +47,8 @@ typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2, bool asym)
boost::any label1, boost::any label2, double norm,
bool asym)
{
if (weight1.empty())
weight1 = ecmap_t();
......@@ -59,7 +60,7 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
{
auto l2 = uncheck(l1, label2);
auto ew2 = uncheck(ew1, weight2);
auto ret = get_similarity(g1, g2, ew1, ew2, l1, l2, asym);
auto ret = get_similarity(g1, g2, ew1, ew2, l1, l2, asym, norm);
s = python::object(ret);
},
all_graph_views(),
......@@ -72,7 +73,8 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2, bool asym);
boost::any label1, boost::any label2,
double norm, bool asym);
void export_similarity()
{
......
......@@ -26,28 +26,27 @@ namespace graph_tool
using namespace std;
using namespace boost;
template <class Map, class K>
typename Map::value_type::second_type get_map(Map& m, K&& k)
{
auto iter = m.find(k);
if (iter == m.end())
return typename Map::value_type::second_type(0);
return iter->second;
}
template <class Keys, class Set1, class Set2>
auto set_difference(Keys& ks, Set1& s1, Set2& s2, bool asym)
template <bool normed, class Keys, class Set1, class Set2>
auto set_difference(Keys& ks, Set1& s1, Set2& s2, double norm, bool asym)
{
typename Set1::value_type::second_type s = 0;
auto ndispatch = [&](auto x){ return normed ? std::pow(x, norm) : x; };
auto get_map =
[&](auto& m, auto&& k)
{
auto iter = m.find(k);
if (iter == m.end())
return decltype(iter->second)(0);
return iter->second;
};
for (auto& k : ks)
{
auto x1 = get_map(s1, k);
auto x2 = get_map(s2, k);
if (x1 > x2)
s += x1 - x2;
s += ndispatch(x1 - x2);
else if (!asym)
s += x2 - x1;
s += ndispatch(x2 - x1);
}
return s;
}
......@@ -57,7 +56,7 @@ template <class Vertex, class WeightMap, class LabelMap,
auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
LabelMap& l1, LabelMap& l2, const Graph1& g1,
const Graph2& g2, bool asym, Keys& keys, Adj& adj1,
Adj& adj2)
Adj& adj2, double norm)
{
if (v1 != graph_traits<Graph1>::null_vertex())
{
......@@ -81,12 +80,16 @@ auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
}
}
return set_difference(keys, adj1, adj2, asym);
if (norm == 1)
return set_difference<false>(keys, adj1, adj2, 1, asym);
else
return set_difference<true>(keys, adj1, adj2, norm, asym);
}
template <class Graph1, class Graph2, class WeightMap, class LabelMap>
auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
WeightMap ew2, LabelMap l1, LabelMap l2, bool asym)
WeightMap ew2, LabelMap l1, LabelMap l2, double norm,
bool asym)
{
typedef typename property_traits<LabelMap>::value_type label_t;
typedef typename property_traits<WeightMap>::value_type val_t;
......@@ -116,7 +119,7 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym, keys,
adj1, adj2);
adj1, adj2, norm);
}
if (!asym)
......@@ -136,7 +139,7 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2);
keys, adj1, adj2, norm);
}
}
return s;
......@@ -144,7 +147,8 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
template <class Graph1, class Graph2, class WeightMap, class LabelMap>
auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
WeightMap ew2, LabelMap l1, LabelMap l2, bool asym)
WeightMap ew2, LabelMap l1, LabelMap l2, double norm,
bool asym)
{
typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t;
......@@ -190,7 +194,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
adj1.clear();
adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym,
keys, adj1, adj2);
keys, adj1, adj2, norm);
});
if (!asym)
......@@ -209,7 +213,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
adj1.clear();
adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2);
keys, adj1, adj2, norm);
});
}
......
......@@ -47,7 +47,8 @@ typedef boost::mpl::push_back<edge_scalar_properties, ecmap_t>::type
python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
boost::any weight1, boost::any weight2,
boost::any label1, boost::any label2, bool asym)
boost::any label1, boost::any label2,
double norm, bool asym)
{
if (weight1.empty())
weight1 = ecmap_t();
......@@ -59,7 +60,7 @@ python::object similarity_fast(GraphInterface& gi1, GraphInterface& gi2,
{
auto l2 = uncheck(l1, label2);
auto ew2 = uncheck(ew1, weight2);
auto ret = get_similarity_fast(g1, g2, ew1, ew2, l1, l2, asym);
auto ret = get_similarity_fast(g1, g2, ew1, ew2, l1, l2, norm, asym);
s = python::object(ret);
},
all_graph_views(),
......
......@@ -92,7 +92,7 @@ __all__ = ["isomorphism", "subgraph_isomorphism", "mark_subgraph",
"edge_reciprocity"]
def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
norm=True, distance=False, asymmetric=False):
norm=True, p=1., distance=False, asymmetric=False):
r"""Return the adjacency similarity between the two graphs.
Parameters
......@@ -114,6 +114,8 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
norm : bool (optional, default: ``True``)
If ``True``, the returned value is normalized by the total number of
edges.
p : float (optional, default: ``1.``)
Exponent of the :math:`L^p` distance function.
distance : bool (optional, default: ``False``)
If ``True``, the complementary value is returned, i.e. the distance
between the two graphs.
......@@ -128,10 +130,12 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
Notes
-----
The adjacency similarity is the sum of equal non-zero entries in the
adjacency matrix, given a vertex ordering determined by the vertex
labels. In other words, it counts the number of edges which have the same
source and target labels in both graphs.
In its default parametrization, the adjacency similarity is the sum of equal
non-zero entries in the adjacency matrix, given a vertex ordering determined
by the vertex labels. In other words, it counts the number of edges which
have the same source and target labels in both graphs. This function also
allows for generalized similarities according to an :math:`L^p` norm, for
arbitrary :math:`p`.
More specifically, it is defined as:
......@@ -143,25 +147,26 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
.. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} |A_{ij}^{(1)} - A_{ij}^{(2)}|
d(\boldsymbol A_1, \boldsymbol A_2) = \left(\sum_{i\le j} \left|A_{ij}^{(1)} - A_{ij}^{(2)}\right|^p\right)^{1/p}
is the distance between graphs, and :math:`E=\sum_{i<j}|A_{ij}^{(1)}| +
|A_{ij}^{(2)}|`. This definition holds for undirected graphs, otherwise the
sums go over all directed pairs. If weights are provided, the weighted
adjacency matrix is used.
is the distance between graphs, and :math:`E=(\sum_{i\le j}|A_{ij}^{(1)}|^p +
|A_{ij}^{(2)}|^p)^{1/p}`. Unless otherwise stated via the parameter ``p``,
the exponent used is :math:`p=1`. This definition holds for undirected
graphs, otherwise the sums go over all directed pairs. If weights are
provided, the weighted adjacency matrix is used.
If ``norm == True`` the value returned is
:math:`S(\boldsymbol A_1, \boldsymbol A_2) / E`.
If ``norm == True`` the value returned is :math:`S(\boldsymbol A_1,
\boldsymbol A_2) / E`, which lies in the interval :math:`[0,1]`.
If ``asymmetric == True``, the above is changed so that the comparison is
made only for entries in :math:`\boldsymbol A_1` that are larger than in :math:`\boldsymbol A_2`, i.e.
.. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} (A_{ij}^{(1)} - A_{ij}^{(2)}) H(A_{ij}^{(1)} - A_{ij}^{(2)}),
d(\boldsymbol A_1, \boldsymbol A_2) = \left(\sum_{i\le j} \left(A_{ij}^{(1)} - A_{ij}^{(2)}\right)^p H(A_{ij}^{(1)} - A_{ij}^{(2)})\right)^{1/p},
where :math:`H(x)` is the unit step function, and the total sum is changed
accordingly to :math:`E=\sum_{i<j}|A_{ij}^{(1)}|`.
accordingly to :math:`E=\left(\sum_{i\le j}|A_{ij}^{(1)}|^p\right)^{1/p}`.
The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`.
......@@ -227,14 +232,18 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
s = libgraph_tool_topology.\
similarity(g1._Graph__graph, g2._Graph__graph,
ew1, ew2, _prop("v", g1, label1),
_prop("v", g2, label2), asymmetric)
_prop("v", g2, label2), p, asymmetric)
else:
s = libgraph_tool_topology.\
similarity_fast(g1._Graph__graph, g2._Graph__graph,
ew1, ew2, _prop("v", g1, label1),
_prop("v", g2, label2), asymmetric)
_prop("v", g2, label2), p, asymmetric)
if not g1.is_directed() or not g2.is_directed():
s //= 2
s **= 1./p
if eweight1 is None and eweight1 is None:
if asymmetric:
E = g1.num_edges()
......@@ -242,9 +251,10 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
E = g1.num_edges() + g2.num_edges()
else:
if asymmetric:
E = float(abs(eweight1.fa).sum())
E = float((abs(eweight1.fa)**p).sum()) ** (1./p)
else:
E = float(abs(eweight1.fa).sum() + abs(eweight2.fa).sum())
E = float((abs(eweight1.fa)**p).sum() +
(abs(eweight2.fa)**p).sum()) ** (1./p)
if not distance:
s = E - s
if norm:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment