Commit f27799a9 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

similarity(): Improve speed for bounded integer labels

parent 4f9e4785
...@@ -67,6 +67,7 @@ libgraph_tool_core_la_include_HEADERS = \ ...@@ -67,6 +67,7 @@ libgraph_tool_core_la_include_HEADERS = \
graph_util.hh \ graph_util.hh \
hash_map_wrap.hh \ hash_map_wrap.hh \
histogram.hh \ histogram.hh \
idx_map.hh \
mpl_nested_loop.hh \ mpl_nested_loop.hh \
numpy_bind.hh \ numpy_bind.hh \
openmp_lock.hh \ openmp_lock.hh \
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2018 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef IDX_MAP_HH
#define IDX_MAP_HH
#include <vector>
#include <utility>
#include <limits>
template <class Key, class T>
class idx_map
{
public:
typedef Key key_type;
typedef T mapped_type;
typedef std::pair<const Key, T> value_type;
typedef typename std::vector<std::pair<Key,T>>::iterator iterator;
typedef typename std::vector<std::pair<Key,T>>::const_iterator const_iterator;
template <class P>
std::pair<iterator,bool> insert(P&& value)
{
if (_pos.size() <= size_t(value.first))
_pos.resize(value.first + 1, _null);
size_t& idx = _pos[value.first];
if (idx == _null)
{
idx = _items.size();
_items.push_back(value);
return std::make_pair(begin() + idx, true);
}
else
{
_items[idx].second = value.second;
return std::make_pair(begin() + idx, false);
}
}
size_t erase(const Key& k)
{
size_t& idx = _pos[k];
if (idx == _null)
return 0;
_pos[_items.back().first] = idx;
std::swap(_items[idx], _items.back());
_items.pop_back();
idx = _null;
return 1;
}
iterator erase(const_iterator pos)
{
size_t idx = pos - begin();
erase(pos->first);
return begin() + idx;
}
T& operator[](const Key& key)
{
return insert(std::make_pair(key, T())).first->second;
}
iterator find(const Key& key)
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
const iterator find(const Key& key) const
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
void clear()
{
for (auto k : _items)
_pos[k.first] = _null;
_items.clear();
}
void shrink_to_fit()
{
_items.shrink_to_fit();
if (_items.empty())
_pos.clear();
_pos.shrink_to_fit();
}
iterator begin() { return _items.begin(); }
iterator end() { return _items.end(); }
const_iterator begin() const { return _items.begin(); }
const_iterator end() const { return _items.end(); }
private:
std::vector<std::pair<Key,T>> _items;
std::vector<size_t> _pos;
static constexpr size_t _null = std::numeric_limits<size_t>::max();
};
template <class Key, class T>
constexpr size_t idx_map<Key, T>::_null;
template <class Key>
class idx_set
{
public:
typedef Key key_type;
typedef Key value_type;
typedef typename std::vector<Key>::iterator iterator;
typedef typename std::vector<Key>::const_iterator const_iterator;
std::pair<iterator,bool> insert(const Key& k)
{
if (_pos.size() <= size_t(k))
_pos.resize(k + 1, _null);
size_t& idx = _pos[k];
if (idx == _null)
{
idx = _items.size();
_items.push_back(k);
return std::make_pair(begin() + idx, true);
}
else
{
return std::make_pair(begin() + idx, false);
}
}
size_t erase(const Key& k)
{
size_t& idx = _pos[k];
if (idx == _null)
return 0;
_pos[_items.back()] = idx;
std::swap(_items[idx], _items.back());
_items.pop_back();
idx = _null;
return 1;
}
iterator erase(const_iterator pos)
{
size_t idx = pos - begin();
erase(pos->first);
return begin() + idx;
}
iterator find(const Key& key)
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
const iterator find(const Key& key) const
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
void clear()
{
for (auto k : _items)
_pos[k] = _null;
_items.clear();
}
void shrink_to_fit()
{
_items.shrink_to_fit();
if (_items.empty())
_pos.clear();
_pos.shrink_to_fit();
}
iterator begin() { return _items.begin(); }
iterator end() { return _items.end(); }
const_iterator begin() const { return _items.begin(); }
const_iterator end() const { return _items.end(); }
private:
std::vector<Key> _items;
std::vector<size_t> _pos;
static constexpr size_t _null = std::numeric_limits<size_t>::max();
};
template <class Key>
constexpr size_t idx_set<Key>::_null;
#endif // IDX_MAP_HH
...@@ -65,7 +65,7 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2, ...@@ -65,7 +65,7 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
all_graph_views(), all_graph_views(),
all_graph_views(), all_graph_views(),
weight_props_t(), weight_props_t(),
writable_vertex_properties()) vertex_scalar_properties())
(gi1.get_graph_view(), gi2.get_graph_view(), weight1, label1); (gi1.get_graph_view(), gi2.get_graph_view(), weight1, label1);
return s; return s;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define GRAPH_SIMILARITY_HH #define GRAPH_SIMILARITY_HH
#include "hash_map_wrap.hh" #include "hash_map_wrap.hh"
#include "idx_map.hh"
namespace graph_tool namespace graph_tool
{ {
...@@ -51,18 +52,13 @@ auto set_difference(Keys& ks, Set1& s1, Set2& s2, bool asym) ...@@ -51,18 +52,13 @@ auto set_difference(Keys& ks, Set1& s1, Set2& s2, bool asym)
return s; return s;
} }
template <class Vertex, class WeightMap, class LabelMap, class Graph1, class Graph2> template <class Vertex, class WeightMap, class LabelMap,
class Graph1, class Graph2, class Keys, class Adj>
auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2, auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
LabelMap& l1, LabelMap& l2, const Graph1& g1, LabelMap& l1, LabelMap& l2, const Graph1& g1,
const Graph2& g2, bool asym) const Graph2& g2, bool asym, Keys& keys, Adj& adj1,
Adj& adj2)
{ {
typedef typename property_traits<LabelMap>::value_type label_t;
typedef typename property_traits<WeightMap>::value_type val_t;
std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1;
std::unordered_map<label_t, val_t> adj2;
if (v1 != graph_traits<Graph1>::null_vertex()) if (v1 != graph_traits<Graph1>::null_vertex())
{ {
for (auto e : out_edges_range(v1, g1)) for (auto e : out_edges_range(v1, g1))
...@@ -116,7 +112,11 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -116,7 +112,11 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
else else
v2 = li2->second; v2 = li2->second;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym); std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym, keys,
adj1, adj2);
} }
if (!asym) if (!asym)
...@@ -132,7 +132,11 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -132,7 +132,11 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
else else
continue; continue;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false); std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2);
} }
} }
return s; return s;
...@@ -144,6 +148,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -144,6 +148,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
{ {
typedef typename property_traits<WeightMap>::value_type val_t; typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t;
typedef typename property_traits<LabelMap>::value_type label_t;
vector<vertex_t> lmap1, lmap2; vector<vertex_t> lmap1, lmap2;
...@@ -167,9 +172,12 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -167,9 +172,12 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
lmap1.resize(N, graph_traits<Graph1>::null_vertex()); lmap1.resize(N, graph_traits<Graph1>::null_vertex());
lmap2.resize(N, graph_traits<Graph2>::null_vertex()); lmap2.resize(N, graph_traits<Graph2>::null_vertex());
idx_set<label_t> keys;
idx_map<label_t, val_t> adj1, adj2;
val_t s = 0; val_t s = 0;
#pragma omp parallel if (num_vertices(g1) > OPENMP_MIN_THRESH) \ #pragma omp parallel if (num_vertices(g1) > OPENMP_MIN_THRESH) \
reduction(+:s) reduction(+:s) firstprivate(keys, adj1, adj2)
parallel_loop_no_spawn parallel_loop_no_spawn
(lmap1, (lmap1,
[&](size_t i, auto v1) [&](size_t i, auto v1)
...@@ -178,13 +186,17 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -178,13 +186,17 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
if (v1 == graph_traits<Graph1>::null_vertex() && if (v1 == graph_traits<Graph1>::null_vertex() &&
v2 == graph_traits<Graph2>::null_vertex()) v2 == graph_traits<Graph2>::null_vertex())
return; return;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym); keys.clear();
adj1.clear();
adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym,
keys, adj1, adj2);
}); });
if (!asym) if (!asym)
{ {
#pragma omp parallel if (num_vertices(g2) > OPENMP_MIN_THRESH) \ #pragma omp parallel if (num_vertices(g2) > OPENMP_MIN_THRESH) \
reduction(+:s) reduction(+:s) firstprivate(keys, adj1, adj2)
parallel_loop_no_spawn parallel_loop_no_spawn
(lmap2, (lmap2,
[&](size_t i, auto v2) [&](size_t i, auto v2)
...@@ -193,7 +205,11 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1, ...@@ -193,7 +205,11 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
if (v1 != graph_traits<Graph1>::null_vertex() || if (v1 != graph_traits<Graph1>::null_vertex() ||
v2 == graph_traits<Graph2>::null_vertex()) v2 == graph_traits<Graph2>::null_vertex())
return; return;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false); keys.clear();
adj1.clear();
adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2);
}); });
} }
......
...@@ -158,15 +158,15 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -158,15 +158,15 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
.. math:: .. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} (A_{ij}^{(1)} - A_{ij}^{(2)}) H((A_{ij}^{(1)} - A_{ij}^{(2)})), d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} (A_{ij}^{(1)} - A_{ij}^{(2)}) H(A_{ij}^{(1)} - A_{ij}^{(2)}),
where :math:`H(x)` is the unit step function, and the total sum is changed where :math:`H(x)` is the unit step function, and the total sum is changed
accordingly to :math:`E=\sum_{i<j}|A_{ij}^{(1)}|`. accordingly to :math:`E=\sum_{i<j}|A_{ij}^{(1)}|`.
The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`. The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`.
If enabled during compilation, and the vertex labels are integers, this If enabled during compilation, and the vertex labels are integers bounded by
algorithm runs in parallel. the sizes of the graphs, this algorithm runs in parallel.
Examples Examples
-------- --------
...@@ -222,7 +222,8 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None, ...@@ -222,7 +222,8 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
ew1 = _prop("e", g1, eweight1) ew1 = _prop("e", g1, eweight1)
ew2 = _prop("e", g2, eweight2) ew2 = _prop("e", g2, eweight2)
if label1.is_writable() or label2.is_writable(): if ((label1.is_writable() and label1.fa.max() > g1.num_vertices()) or
(label2.is_writable() and label2.fa.max() > g2.num_vertices())):
s = libgraph_tool_topology.\ s = libgraph_tool_topology.\
similarity(g1._Graph__graph, g2._Graph__graph, similarity(g1._Graph__graph, g2._Graph__graph,
ew1, ew2, _prop("v", g1, label1), ew1, ew2, _prop("v", g1, label1),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment