Commit 8472a7c4 authored by Tiago Peixoto's avatar Tiago Peixoto

generation: implement generate_knn()

parent e1f7a120
...@@ -23,6 +23,7 @@ libgraph_tool_generation_la_SOURCES = \ ...@@ -23,6 +23,7 @@ libgraph_tool_generation_la_SOURCES = \
graph_complete.cc \ graph_complete.cc \
graph_generation.cc \ graph_generation.cc \
graph_geometric.cc \ graph_geometric.cc \
graph_knn.cc \
graph_lattice.cc \ graph_lattice.cc \
graph_line_graph.cc \ graph_line_graph.cc \
graph_maxent_sbm.cc \ graph_maxent_sbm.cc \
...@@ -42,6 +43,7 @@ libgraph_tool_generation_la_include_HEADERS = \ ...@@ -42,6 +43,7 @@ libgraph_tool_generation_la_include_HEADERS = \
graph_complete.hh \ graph_complete.hh \
graph_generation.hh \ graph_generation.hh \
graph_geometric.hh \ graph_geometric.hh \
graph_knn.hh \
graph_lattice.hh \ graph_lattice.hh \
graph_maxent_sbm.hh \ graph_maxent_sbm.hh \
graph_predecessor.hh \ graph_predecessor.hh \
......
...@@ -79,6 +79,13 @@ void generate_sbm(GraphInterface& gi, boost::any ab, boost::python::object ors, ...@@ -79,6 +79,13 @@ void generate_sbm(GraphInterface& gi, boost::any ab, boost::python::object ors,
boost::any ain_deg, boost::any aout_deg, bool micro_ers, boost::any ain_deg, boost::any aout_deg, bool micro_ers,
bool micro_degs, rng_t& rng); bool micro_degs, rng_t& rng);
void generate_knn(GraphInterface& gi, boost::python::object om, size_t k,
double r, double epsilon, bool cache, boost::any aw,
rng_t& rng);
void generate_knn_exact(GraphInterface& gi, boost::python::object om, size_t k,
boost::any aw);
size_t random_rewire(GraphInterface& gi, string strat, size_t niter, size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges, bool no_sweep, bool self_loops, bool parallel_edges,
bool configuration, bool traditional, bool micro, bool configuration, bool traditional, bool micro,
...@@ -135,6 +142,8 @@ BOOST_PYTHON_MODULE(libgraph_tool_generation) ...@@ -135,6 +142,8 @@ BOOST_PYTHON_MODULE(libgraph_tool_generation)
docstring_options dopt(true, false); docstring_options dopt(true, false);
def("gen_graph", &generate_graph); def("gen_graph", &generate_graph);
def("gen_sbm", &generate_sbm); def("gen_sbm", &generate_sbm);
def("gen_knn", &generate_knn);
def("gen_knn_exact", &generate_knn_exact);
def("random_rewire", &random_rewire); def("random_rewire", &random_rewire);
def("predecessor_graph", &predecessor_graph); def("predecessor_graph", &predecessor_graph);
def("line_graph", &line_graph); def("line_graph", &line_graph);
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2020 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3 of the License, or (at your option) any
// later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_knn.hh"
#include "numpy_bind.hh"
using namespace std;
using namespace boost;
using namespace graph_tool;
template <class D>
class CachedDist
{
public:
CachedDist(GraphInterface& gi, D& d)
: _d(d)
{
run_action<>()
(gi, [&](auto& g) { _dist_cache.resize(num_vertices(g)); })();
}
double operator()(size_t v, size_t u)
{
auto& cache = _dist_cache[v];
auto iter = cache.find(u);
if (iter == cache.end())
{
double d = _d(v, u);
cache[u] = d;
return d;
}
return iter->second;
}
private:
std::vector<gt_hash_map<size_t, double>> _dist_cache;
D& _d;
};
template <class D>
auto make_cached_dist(GraphInterface& gi, D& d)
{
return CachedDist<D>(gi, d);
}
void generate_knn(GraphInterface& gi, boost::python::object om, size_t k,
double r, double epsilon, bool cache, boost::any aw,
rng_t& rng)
{
typedef eprop_map_t<double>::type emap_t;
auto w = any_cast<emap_t>(aw);
try
{
auto m = get_array<double, 2>(om);
auto d_e =
[&](auto u, auto v)
{
double d = 0;
auto x = m[u];
auto y = m[v];
for (size_t i = 0; i < m.shape()[1]; ++i)
d += pow(x[i] - y[i], 2);
return sqrt(d);
};
if (!cache)
{
run_action<>()
(gi, [&](auto& g) { gen_knn<true>(g, d_e, k, r, epsilon, w, rng); })();
}
else
{
auto d = make_cached_dist(gi, d_e);
run_action<>()
(gi, [&](auto& g) { gen_knn<true>(g, d, k, r, epsilon, w, rng); })();
}
}
catch (InvalidNumpyConversion&)
{
if (!cache)
{
auto d_e =
[&](auto v, auto u)
{
double d = python::extract<double>(om(v, u));
return d;
};
run_action<>()
(gi, [&](auto& g) { gen_knn<false>(g, d_e, k, r, epsilon, w, rng); })();
}
else
{
auto d_e =
[&](auto v, auto u)
{
double d;
#pragma omp critical
d = python::extract<double>(om(v, u));
return d;
};
auto d = make_cached_dist(gi, d_e);
run_action<>()
(gi, [&](auto& g) { gen_knn<true>(g, d, k, r, epsilon, w, rng); })();
}
}
}
void generate_knn_exact(GraphInterface& gi, boost::python::object om, size_t k,
boost::any aw)
{
typedef eprop_map_t<double>::type emap_t;
auto w = any_cast<emap_t>(aw);
try
{
auto m = get_array<double, 2>(om);
run_action<>()
(gi, [&](auto& g) { gen_knn_exact<true>(g,
[&](auto u, auto v)
{
double d = 0;
for (size_t i = 0; i < m.shape()[1]; ++i)
d += pow(m[u][i] - m[v][i], 2);
return sqrt(d);
},
k, w); })();
}
catch (InvalidNumpyConversion&)
{
run_action<>()
(gi, [&](auto& g) { gen_knn_exact<false>(g,
[&](auto u, auto v)
{
double d;
d = python::extract<double>(om(u, v));
return d;
},
k, w); })();
}
}
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2020 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3 of the License, or (at your option) any
// later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef GRAPH_KNN_HH
#define GRAPH_KNN_HH
#include <tuple>
#include <iostream>
#include <random>
#include <boost/functional/hash.hpp>
#include "graph.hh"
#include "graph_filtering.hh"
#include "graph_util.hh"
#include "parallel_rng.hh"
#include "random.hh"
#include "hash_map_wrap.hh"
namespace graph_tool
{
using namespace std;
using namespace boost;
template <bool parallel, class Graph, class Dist, class Weight, class RNG>
void gen_knn(Graph& g, Dist&& d, size_t k, double r, double epsilon,
Weight eweight, RNG& rng_)
{
parallel_rng<rng_t>::init(rng_);
auto cmp =
[] (auto& x, auto& y)
{
return get<1>(x) < get<1>(y);
};
typedef std::set<std::tuple<size_t, double>, decltype(cmp)> set_t;
std::vector<set_t> B(num_vertices(g), set_t(cmp));
std::vector<size_t> vs;
for (auto v : vertices_range(g))
vs.push_back(v);
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH && parallel) \
firstprivate(vs)
parallel_vertex_loop_no_spawn
(g,
[&](auto v)
{
auto& rng = parallel_rng<rng_t>::get(rng_);
for (auto u : random_permutation_range(vs, rng))
{
if (u == v)
continue;
double l = d(v, u);
B[v].insert({u, l});
if (B[v].size() == k)
break;
}
});
std::bernoulli_distribution rsample(r);
double delta = epsilon + 1;
while (delta > epsilon)
{
for (auto v : vertices_range(g))
clear_vertex(v, g);
for (auto v : vertices_range(g))
{
for (auto& u : B[v])
add_edge(v, get<0>(u), g);
}
size_t c = 0;
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH && parallel) \
reduction(+:c)
parallel_vertex_loop_no_spawn
(g,
[&](auto v)
{
auto& rng = parallel_rng<rng_t>::get(rng_);
auto& Bv = B[v];
for (auto u : all_neighbors_range(v, g))
{
if (!rsample(rng))
continue;
for (auto w : all_neighbors_range(u, g))
{
if (w == u || w == v || !rsample(rng))
continue;
double l = d(v, w);
auto iter = Bv.lower_bound({w, l});
if (iter != Bv.end() && get<0>(*iter) != w)
{
Bv.insert(iter, {w, l});
iter = Bv.end();
--iter;
Bv.erase(iter);
++c;
}
}
}
});
delta = c / double(vs.size() * k);
}
for (auto v : vertices_range(g))
clear_vertex(v, g);
for (auto v : vertices_range(g))
{
for (auto& u : B[v])
{
auto e = add_edge(v, get<0>(u), g);
eweight[e.first] = get<1>(u);
}
}
}
template <bool parallel, class Graph, class Dist, class Weight>
void gen_knn_exact(Graph& g, Dist&& d, size_t k, Weight eweight)
{
std::vector<std::vector<std::tuple<size_t, double>>> vs(num_vertices(g));
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH && parallel)
parallel_vertex_loop_no_spawn
(g,
[&](auto v)
{
auto& ns = vs[v];
for (auto u : vertices_range(g))
{
if (u == v)
continue;
ns.emplace_back(u, d(v, u));
}
nth_element(ns.begin(),
ns.begin() + k,
ns.end(),
[] (auto& x, auto& y)
{
return get<1>(x) < get<1>(y);
});
ns.resize(k);
ns.shrink_to_fit();
});
for (auto v : vertices_range(g))
{
for (auto& u : vs[v])
{
auto e = add_edge(v, get<0>(u), g);
eweight[e.first] = get<1>(u);
}
}
}
} // graph_tool namespace
#endif // GRAPH_KNN_HH
...@@ -53,6 +53,7 @@ dl_import("from . import libgraph_tool_generation") ...@@ -53,6 +53,7 @@ dl_import("from . import libgraph_tool_generation")
from .. import Graph, GraphView, _check_prop_scalar, _prop, _limit_args, \ from .. import Graph, GraphView, _check_prop_scalar, _prop, _limit_args, \
_gt_type, _get_rng, Vector_double _gt_type, _get_rng, Vector_double
from .. stats import remove_parallel_edges
import inspect import inspect
import types import types
import numpy import numpy
...@@ -61,12 +62,11 @@ import scipy.optimize ...@@ -61,12 +62,11 @@ import scipy.optimize
import scipy.sparse import scipy.sparse
__all__ = ["random_graph", "random_rewire", "generate_sbm", __all__ = ["random_graph", "random_rewire", "generate_sbm",
"solve_sbm_fugacities", "generate_maxent_sbm", "predecessor_tree", "solve_sbm_fugacities", "generate_maxent_sbm", "generate_knn",
"line_graph", "graph_union", "triangulation", "lattice", "predecessor_tree", "line_graph", "graph_union", "triangulation",
"geometric_graph", "price_network", "complete_graph", "lattice", "geometric_graph", "price_network", "complete_graph",
"circular_graph", "condensation_graph"] "circular_graph", "condensation_graph"]
def random_graph(N, deg_sampler, directed=True, def random_graph(N, deg_sampler, directed=True,
parallel_edges=False, self_loops=False, block_membership=None, parallel_edges=False, self_loops=False, block_membership=None,
block_type="int", degree_block=False, block_type="int", degree_block=False,
...@@ -1378,7 +1378,95 @@ def generate_maxent_sbm(b, mrs, out_theta, in_theta=None, directed=False, ...@@ -1378,7 +1378,95 @@ def generate_maxent_sbm(b, mrs, out_theta, in_theta=None, directed=False,
multigraph, self_loops, _get_rng()) multigraph, self_loops, _get_rng())
return g return g
def generate_knn(points, k, dist=None, exact=False, r=.5, epsilon=.001,
directed=False, cache_dist=True):
r"""Generate a graph of k-nearest neighbors from a set of multidimensional points.
Parameters
----------
points : iterable of lists (or :class:`numpy.ndarray`) of dimension :math:`N\times D` or ``int``
Points of dimension :math:`D` to be considered. If the parameter `dist`
is passed, this should be just an `int` containing the number of points.
k : ``int``
Number of nearest neighbors.
dist : function (optional, default: ``None``)
If given, this should be a function that returns the distance between
two points. The arguments of this function should just be two integers,
corresponding to the vertex index. In this case the value of ``points``
should just be the total number of points. If ``dist is None``, then the
L2-norm (Euclidean distance) is used.
exact : ``bool`` (optional, default: ``False``)
If ``False``, an fast approximation will be used, otherwise an exact but
slow algorithm will be used.
r : ``float`` (optional, default: ``.5``)
If ``exact is False``, this specifies the fraction of randomly chosen
neighbors that are used for the search.
epsilon : ``float`` (optional, default: ``.001``)
If ``exact is False``, this determines the convergence criterion used by
the algorithm. When the fraction of updated neighbors drops below this
value, the algorithm stops.
directed : ``bool`` (optional, default: ``False``)
If ``True`` a directed version of the graph will be returned, otherwise
the graph is undirected.
cache_dist : ``bool`` (optional, default: ``True``)
If ``True``, an internal cache of the distance values are kept,
implemented as a hash table.
Returns
-------
g : :class:`~graph_tool.Graph`
The k-nearest neighbors graph.
w : :class:`~graph_tool.EdgePropertyMap`
Edge property map with the computed distances.
Notes
-----
The approximate version of this algorithm is based on
[[dong-efficient-2020]_, and has an (empirical) run-time of
:math:`O(N^{1.14})`. The exact version has a complexity of :math:`O(N^2)`.
If enabled during compilation, this algorithm runs in parallel.
References
----------
.. [dong-efficient-2020] Wei Dong, Charikar Moses, and Kai Li, "Efficient
k-nearest neighbor graph construction for generic similarity measures",
In Proceedings of the 20th international conference on World wide web
(WWW '11). Association for Computing Machinery, New York, NY, USA,
577–586, (2011) :doi:`https://doi.org/10.1145/1963405.1963487`
Examples
--------
>>> points = np.random.random((1000, 10))
>>> g, w = gt.generate_knn(points, k=5)
"""
if dist is not None:
N = points
points = dist
else:
points = numpy.asarray(points, dtype="float")
N = points.shape[0]
g = Graph()
g.add_vertex(N)
w = g.new_ep("double")
if exact:
libgraph_tool_generation.gen_knn_exact(g._Graph__graph, points, k,
_prop("e", g, w))
else:
libgraph_tool_generation.gen_knn(g._Graph__graph, points, k, r, epsilon,
cache_dist, _prop("e", g, w), _get_rng())
if not directed:
g.set_directed(False)
remove_parallel_edges(g)
return g, w
def predecessor_tree(g, pred_map): def predecessor_tree(g, pred_map):
"""Return a graph from a list of predecessors given by the ``pred_map`` vertex property.""" """Return a graph from a list of predecessors given by the ``pred_map`` vertex property."""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment