Commit f27799a9 authored by Tiago Peixoto's avatar Tiago Peixoto

similarity(): Improve speed for bounded integer labels

parent 4f9e4785
......@@ -67,6 +67,7 @@ libgraph_tool_core_la_include_HEADERS = \
graph_util.hh \
hash_map_wrap.hh \
histogram.hh \
idx_map.hh \
mpl_nested_loop.hh \
numpy_bind.hh \
openmp_lock.hh \
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2018 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef IDX_MAP_HH
#define IDX_MAP_HH
#include <vector>
#include <utility>
#include <limits>
template <class Key, class T>
class idx_map
{
public:
typedef Key key_type;
typedef T mapped_type;
typedef std::pair<const Key, T> value_type;
typedef typename std::vector<std::pair<Key,T>>::iterator iterator;
typedef typename std::vector<std::pair<Key,T>>::const_iterator const_iterator;
template <class P>
std::pair<iterator,bool> insert(P&& value)
{
if (_pos.size() <= size_t(value.first))
_pos.resize(value.first + 1, _null);
size_t& idx = _pos[value.first];
if (idx == _null)
{
idx = _items.size();
_items.push_back(value);
return std::make_pair(begin() + idx, true);
}
else
{
_items[idx].second = value.second;
return std::make_pair(begin() + idx, false);
}
}
size_t erase(const Key& k)
{
size_t& idx = _pos[k];
if (idx == _null)
return 0;
_pos[_items.back().first] = idx;
std::swap(_items[idx], _items.back());
_items.pop_back();
idx = _null;
return 1;
}
iterator erase(const_iterator pos)
{
size_t idx = pos - begin();
erase(pos->first);
return begin() + idx;
}
T& operator[](const Key& key)
{
return insert(std::make_pair(key, T())).first->second;
}
iterator find(const Key& key)
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
const iterator find(const Key& key) const
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
void clear()
{
for (auto k : _items)
_pos[k.first] = _null;
_items.clear();
}
void shrink_to_fit()
{
_items.shrink_to_fit();
if (_items.empty())
_pos.clear();
_pos.shrink_to_fit();
}
iterator begin() { return _items.begin(); }
iterator end() { return _items.end(); }
const_iterator begin() const { return _items.begin(); }
const_iterator end() const { return _items.end(); }
private:
std::vector<std::pair<Key,T>> _items;
std::vector<size_t> _pos;
static constexpr size_t _null = std::numeric_limits<size_t>::max();
};
template <class Key, class T>
constexpr size_t idx_map<Key, T>::_null;
template <class Key>
class idx_set
{
public:
typedef Key key_type;
typedef Key value_type;
typedef typename std::vector<Key>::iterator iterator;
typedef typename std::vector<Key>::const_iterator const_iterator;
std::pair<iterator,bool> insert(const Key& k)
{
if (_pos.size() <= size_t(k))
_pos.resize(k + 1, _null);
size_t& idx = _pos[k];
if (idx == _null)
{
idx = _items.size();
_items.push_back(k);
return std::make_pair(begin() + idx, true);
}
else
{
return std::make_pair(begin() + idx, false);
}
}
size_t erase(const Key& k)
{
size_t& idx = _pos[k];
if (idx == _null)
return 0;
_pos[_items.back()] = idx;
std::swap(_items[idx], _items.back());
_items.pop_back();
idx = _null;
return 1;
}
iterator erase(const_iterator pos)
{
size_t idx = pos - begin();
erase(pos->first);
return begin() + idx;
}
iterator find(const Key& key)
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
const iterator find(const Key& key) const
{
if (size_t(key) >= _pos.size())
return end();
size_t idx = _pos[key];
if (idx == _null)
return end();
return begin() + idx;
}
void clear()
{
for (auto k : _items)
_pos[k] = _null;
_items.clear();
}
void shrink_to_fit()
{
_items.shrink_to_fit();
if (_items.empty())
_pos.clear();
_pos.shrink_to_fit();
}
iterator begin() { return _items.begin(); }
iterator end() { return _items.end(); }
const_iterator begin() const { return _items.begin(); }
const_iterator end() const { return _items.end(); }
private:
std::vector<Key> _items;
std::vector<size_t> _pos;
static constexpr size_t _null = std::numeric_limits<size_t>::max();
};
template <class Key>
constexpr size_t idx_set<Key>::_null;
#endif // IDX_MAP_HH
......@@ -65,7 +65,7 @@ python::object similarity(GraphInterface& gi1, GraphInterface& gi2,
all_graph_views(),
all_graph_views(),
weight_props_t(),
writable_vertex_properties())
vertex_scalar_properties())
(gi1.get_graph_view(), gi2.get_graph_view(), weight1, label1);
return s;
}
......
......@@ -19,6 +19,7 @@
#define GRAPH_SIMILARITY_HH
#include "hash_map_wrap.hh"
#include "idx_map.hh"
namespace graph_tool
{
......@@ -51,18 +52,13 @@ auto set_difference(Keys& ks, Set1& s1, Set2& s2, bool asym)
return s;
}
template <class Vertex, class WeightMap, class LabelMap, class Graph1, class Graph2>
template <class Vertex, class WeightMap, class LabelMap,
class Graph1, class Graph2, class Keys, class Adj>
auto vertex_difference(Vertex v1, Vertex v2, WeightMap& ew1, WeightMap& ew2,
LabelMap& l1, LabelMap& l2, const Graph1& g1,
const Graph2& g2, bool asym)
const Graph2& g2, bool asym, Keys& keys, Adj& adj1,
Adj& adj2)
{
typedef typename property_traits<LabelMap>::value_type label_t;
typedef typename property_traits<WeightMap>::value_type val_t;
std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1;
std::unordered_map<label_t, val_t> adj2;
if (v1 != graph_traits<Graph1>::null_vertex())
{
for (auto e : out_edges_range(v1, g1))
......@@ -116,7 +112,11 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
else
v2 = li2->second;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym);
std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym, keys,
adj1, adj2);
}
if (!asym)
......@@ -132,7 +132,11 @@ auto get_similarity(const Graph1& g1, const Graph2& g2, WeightMap ew1,
else
continue;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false);
std::unordered_set<label_t> keys;
std::unordered_map<label_t, val_t> adj1, adj2;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2);
}
}
return s;
......@@ -144,6 +148,7 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
{
typedef typename property_traits<WeightMap>::value_type val_t;
typedef typename graph_traits<Graph1>::vertex_descriptor vertex_t;
typedef typename property_traits<LabelMap>::value_type label_t;
vector<vertex_t> lmap1, lmap2;
......@@ -167,9 +172,12 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
lmap1.resize(N, graph_traits<Graph1>::null_vertex());
lmap2.resize(N, graph_traits<Graph2>::null_vertex());
idx_set<label_t> keys;
idx_map<label_t, val_t> adj1, adj2;
val_t s = 0;
#pragma omp parallel if (num_vertices(g1) > OPENMP_MIN_THRESH) \
reduction(+:s)
reduction(+:s) firstprivate(keys, adj1, adj2)
parallel_loop_no_spawn
(lmap1,
[&](size_t i, auto v1)
......@@ -178,13 +186,17 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
if (v1 == graph_traits<Graph1>::null_vertex() &&
v2 == graph_traits<Graph2>::null_vertex())
return;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym);
keys.clear();
adj1.clear();
adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, asym,
keys, adj1, adj2);
});
if (!asym)
{
#pragma omp parallel if (num_vertices(g2) > OPENMP_MIN_THRESH) \
reduction(+:s)
reduction(+:s) firstprivate(keys, adj1, adj2)
parallel_loop_no_spawn
(lmap2,
[&](size_t i, auto v2)
......@@ -193,7 +205,11 @@ auto get_similarity_fast(const Graph1& g1, const Graph2& g2, WeightMap ew1,
if (v1 != graph_traits<Graph1>::null_vertex() ||
v2 == graph_traits<Graph2>::null_vertex())
return;
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false);
keys.clear();
adj1.clear();
adj2.clear();
s += vertex_difference(v1, v2, ew1, ew2, l1, l2, g1, g2, false,
keys, adj1, adj2);
});
}
......
......@@ -158,15 +158,15 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
.. math::
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} (A_{ij}^{(1)} - A_{ij}^{(2)}) H((A_{ij}^{(1)} - A_{ij}^{(2)})),
d(\boldsymbol A_1, \boldsymbol A_2) = \sum_{i<j} (A_{ij}^{(1)} - A_{ij}^{(2)}) H(A_{ij}^{(1)} - A_{ij}^{(2)}),
where :math:`H(x)` is the unit step function, and the total sum is changed
accordingly to :math:`E=\sum_{i<j}|A_{ij}^{(1)}|`.
The algorithm runs with complexity :math:`O(E_1 + V_1 + E_2 + V_2)`.
If enabled during compilation, and the vertex labels are integers, this
algorithm runs in parallel.
If enabled during compilation, and the vertex labels are integers bounded by
the sizes of the graphs, this algorithm runs in parallel.
Examples
--------
......@@ -222,7 +222,8 @@ def similarity(g1, g2, eweight1=None, eweight2=None, label1=None, label2=None,
ew1 = _prop("e", g1, eweight1)
ew2 = _prop("e", g2, eweight2)
if label1.is_writable() or label2.is_writable():
if ((label1.is_writable() and label1.fa.max() > g1.num_vertices()) or
(label2.is_writable() and label2.fa.max() > g2.num_vertices())):
s = libgraph_tool_topology.\
similarity(g1._Graph__graph, g2._Graph__graph,
ew1, ew2, _prop("v", g1, label1),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment