Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Malte R
graph-tool
Commits
8472a7c4
Commit
8472a7c4
authored
Aug 18, 2020
by
Tiago Peixoto
Browse files
generation: implement generate_knn()
parent
e1f7a120
Changes
5
Hide whitespace changes
Inline
Side-by-side
src/graph/generation/Makefile.am
View file @
8472a7c4
...
...
@@ -23,6 +23,7 @@ libgraph_tool_generation_la_SOURCES = \
graph_complete.cc
\
graph_generation.cc
\
graph_geometric.cc
\
graph_knn.cc
\
graph_lattice.cc
\
graph_line_graph.cc
\
graph_maxent_sbm.cc
\
...
...
@@ -42,6 +43,7 @@ libgraph_tool_generation_la_include_HEADERS = \
graph_complete.hh
\
graph_generation.hh
\
graph_geometric.hh
\
graph_knn.hh
\
graph_lattice.hh
\
graph_maxent_sbm.hh
\
graph_predecessor.hh
\
...
...
src/graph/generation/graph_generation.cc
View file @
8472a7c4
...
...
@@ -79,6 +79,13 @@ void generate_sbm(GraphInterface& gi, boost::any ab, boost::python::object ors,
boost
::
any
ain_deg
,
boost
::
any
aout_deg
,
bool
micro_ers
,
bool
micro_degs
,
rng_t
&
rng
);
void
generate_knn
(
GraphInterface
&
gi
,
boost
::
python
::
object
om
,
size_t
k
,
double
r
,
double
epsilon
,
bool
cache
,
boost
::
any
aw
,
rng_t
&
rng
);
void
generate_knn_exact
(
GraphInterface
&
gi
,
boost
::
python
::
object
om
,
size_t
k
,
boost
::
any
aw
);
size_t
random_rewire
(
GraphInterface
&
gi
,
string
strat
,
size_t
niter
,
bool
no_sweep
,
bool
self_loops
,
bool
parallel_edges
,
bool
configuration
,
bool
traditional
,
bool
micro
,
...
...
@@ -135,6 +142,8 @@ BOOST_PYTHON_MODULE(libgraph_tool_generation)
docstring_options
dopt
(
true
,
false
);
def
(
"gen_graph"
,
&
generate_graph
);
def
(
"gen_sbm"
,
&
generate_sbm
);
def
(
"gen_knn"
,
&
generate_knn
);
def
(
"gen_knn_exact"
,
&
generate_knn_exact
);
def
(
"random_rewire"
,
&
random_rewire
);
def
(
"predecessor_graph"
,
&
predecessor_graph
);
def
(
"line_graph"
,
&
line_graph
);
...
...
src/graph/generation/graph_knn.cc
0 → 100644
View file @
8472a7c4
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2020 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3 of the License, or (at your option) any
// later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_knn.hh"
#include "numpy_bind.hh"
using
namespace
std
;
using
namespace
boost
;
using
namespace
graph_tool
;
template
<
class
D
>
class
CachedDist
{
public:
CachedDist
(
GraphInterface
&
gi
,
D
&
d
)
:
_d
(
d
)
{
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
_dist_cache
.
resize
(
num_vertices
(
g
));
})();
}
double
operator
()(
size_t
v
,
size_t
u
)
{
auto
&
cache
=
_dist_cache
[
v
];
auto
iter
=
cache
.
find
(
u
);
if
(
iter
==
cache
.
end
())
{
double
d
=
_d
(
v
,
u
);
cache
[
u
]
=
d
;
return
d
;
}
return
iter
->
second
;
}
private:
std
::
vector
<
gt_hash_map
<
size_t
,
double
>>
_dist_cache
;
D
&
_d
;
};
template
<
class
D
>
auto
make_cached_dist
(
GraphInterface
&
gi
,
D
&
d
)
{
return
CachedDist
<
D
>
(
gi
,
d
);
}
void
generate_knn
(
GraphInterface
&
gi
,
boost
::
python
::
object
om
,
size_t
k
,
double
r
,
double
epsilon
,
bool
cache
,
boost
::
any
aw
,
rng_t
&
rng
)
{
typedef
eprop_map_t
<
double
>::
type
emap_t
;
auto
w
=
any_cast
<
emap_t
>
(
aw
);
try
{
auto
m
=
get_array
<
double
,
2
>
(
om
);
auto
d_e
=
[
&
](
auto
u
,
auto
v
)
{
double
d
=
0
;
auto
x
=
m
[
u
];
auto
y
=
m
[
v
];
for
(
size_t
i
=
0
;
i
<
m
.
shape
()[
1
];
++
i
)
d
+=
pow
(
x
[
i
]
-
y
[
i
],
2
);
return
sqrt
(
d
);
};
if
(
!
cache
)
{
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
gen_knn
<
true
>
(
g
,
d_e
,
k
,
r
,
epsilon
,
w
,
rng
);
})();
}
else
{
auto
d
=
make_cached_dist
(
gi
,
d_e
);
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
gen_knn
<
true
>
(
g
,
d
,
k
,
r
,
epsilon
,
w
,
rng
);
})();
}
}
catch
(
InvalidNumpyConversion
&
)
{
if
(
!
cache
)
{
auto
d_e
=
[
&
](
auto
v
,
auto
u
)
{
double
d
=
python
::
extract
<
double
>
(
om
(
v
,
u
));
return
d
;
};
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
gen_knn
<
false
>
(
g
,
d_e
,
k
,
r
,
epsilon
,
w
,
rng
);
})();
}
else
{
auto
d_e
=
[
&
](
auto
v
,
auto
u
)
{
double
d
;
#pragma omp critical
d
=
python
::
extract
<
double
>
(
om
(
v
,
u
));
return
d
;
};
auto
d
=
make_cached_dist
(
gi
,
d_e
);
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
gen_knn
<
true
>
(
g
,
d
,
k
,
r
,
epsilon
,
w
,
rng
);
})();
}
}
}
void
generate_knn_exact
(
GraphInterface
&
gi
,
boost
::
python
::
object
om
,
size_t
k
,
boost
::
any
aw
)
{
typedef
eprop_map_t
<
double
>::
type
emap_t
;
auto
w
=
any_cast
<
emap_t
>
(
aw
);
try
{
auto
m
=
get_array
<
double
,
2
>
(
om
);
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
gen_knn_exact
<
true
>
(
g
,
[
&
](
auto
u
,
auto
v
)
{
double
d
=
0
;
for
(
size_t
i
=
0
;
i
<
m
.
shape
()[
1
];
++
i
)
d
+=
pow
(
m
[
u
][
i
]
-
m
[
v
][
i
],
2
);
return
sqrt
(
d
);
},
k
,
w
);
})();
}
catch
(
InvalidNumpyConversion
&
)
{
run_action
<>
()
(
gi
,
[
&
](
auto
&
g
)
{
gen_knn_exact
<
false
>
(
g
,
[
&
](
auto
u
,
auto
v
)
{
double
d
;
d
=
python
::
extract
<
double
>
(
om
(
u
,
v
));
return
d
;
},
k
,
w
);
})();
}
}
src/graph/generation/graph_knn.hh
0 → 100644
View file @
8472a7c4
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2020 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3 of the License, or (at your option) any
// later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef GRAPH_KNN_HH
#define GRAPH_KNN_HH
#include <tuple>
#include <iostream>
#include <random>
#include <boost/functional/hash.hpp>
#include "graph.hh"
#include "graph_filtering.hh"
#include "graph_util.hh"
#include "parallel_rng.hh"
#include "random.hh"
#include "hash_map_wrap.hh"
namespace
graph_tool
{
using
namespace
std
;
using
namespace
boost
;
template
<
bool
parallel
,
class
Graph
,
class
Dist
,
class
Weight
,
class
RNG
>
void
gen_knn
(
Graph
&
g
,
Dist
&&
d
,
size_t
k
,
double
r
,
double
epsilon
,
Weight
eweight
,
RNG
&
rng_
)
{
parallel_rng
<
rng_t
>::
init
(
rng_
);
auto
cmp
=
[]
(
auto
&
x
,
auto
&
y
)
{
return
get
<
1
>
(
x
)
<
get
<
1
>
(
y
);
};
typedef
std
::
set
<
std
::
tuple
<
size_t
,
double
>
,
decltype
(
cmp
)
>
set_t
;
std
::
vector
<
set_t
>
B
(
num_vertices
(
g
),
set_t
(
cmp
));
std
::
vector
<
size_t
>
vs
;
for
(
auto
v
:
vertices_range
(
g
))
vs
.
push_back
(
v
);
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH && parallel) \
firstprivate(vs)
parallel_vertex_loop_no_spawn
(
g
,
[
&
](
auto
v
)
{
auto
&
rng
=
parallel_rng
<
rng_t
>::
get
(
rng_
);
for
(
auto
u
:
random_permutation_range
(
vs
,
rng
))
{
if
(
u
==
v
)
continue
;
double
l
=
d
(
v
,
u
);
B
[
v
].
insert
({
u
,
l
});
if
(
B
[
v
].
size
()
==
k
)
break
;
}
});
std
::
bernoulli_distribution
rsample
(
r
);
double
delta
=
epsilon
+
1
;
while
(
delta
>
epsilon
)
{
for
(
auto
v
:
vertices_range
(
g
))
clear_vertex
(
v
,
g
);
for
(
auto
v
:
vertices_range
(
g
))
{
for
(
auto
&
u
:
B
[
v
])
add_edge
(
v
,
get
<
0
>
(
u
),
g
);
}
size_t
c
=
0
;
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH && parallel) \
reduction(+:c)
parallel_vertex_loop_no_spawn
(
g
,
[
&
](
auto
v
)
{
auto
&
rng
=
parallel_rng
<
rng_t
>::
get
(
rng_
);
auto
&
Bv
=
B
[
v
];
for
(
auto
u
:
all_neighbors_range
(
v
,
g
))
{
if
(
!
rsample
(
rng
))
continue
;
for
(
auto
w
:
all_neighbors_range
(
u
,
g
))
{
if
(
w
==
u
||
w
==
v
||
!
rsample
(
rng
))
continue
;
double
l
=
d
(
v
,
w
);
auto
iter
=
Bv
.
lower_bound
({
w
,
l
});
if
(
iter
!=
Bv
.
end
()
&&
get
<
0
>
(
*
iter
)
!=
w
)
{
Bv
.
insert
(
iter
,
{
w
,
l
});
iter
=
Bv
.
end
();
--
iter
;
Bv
.
erase
(
iter
);
++
c
;
}
}
}
});
delta
=
c
/
double
(
vs
.
size
()
*
k
);
}
for
(
auto
v
:
vertices_range
(
g
))
clear_vertex
(
v
,
g
);
for
(
auto
v
:
vertices_range
(
g
))
{
for
(
auto
&
u
:
B
[
v
])
{
auto
e
=
add_edge
(
v
,
get
<
0
>
(
u
),
g
);
eweight
[
e
.
first
]
=
get
<
1
>
(
u
);
}
}
}
template
<
bool
parallel
,
class
Graph
,
class
Dist
,
class
Weight
>
void
gen_knn_exact
(
Graph
&
g
,
Dist
&&
d
,
size_t
k
,
Weight
eweight
)
{
std
::
vector
<
std
::
vector
<
std
::
tuple
<
size_t
,
double
>>>
vs
(
num_vertices
(
g
));
#pragma omp parallel if (num_vertices(g) > OPENMP_MIN_THRESH && parallel)
parallel_vertex_loop_no_spawn
(
g
,
[
&
](
auto
v
)
{
auto
&
ns
=
vs
[
v
];
for
(
auto
u
:
vertices_range
(
g
))
{
if
(
u
==
v
)
continue
;
ns
.
emplace_back
(
u
,
d
(
v
,
u
));
}
nth_element
(
ns
.
begin
(),
ns
.
begin
()
+
k
,
ns
.
end
(),
[]
(
auto
&
x
,
auto
&
y
)
{
return
get
<
1
>
(
x
)
<
get
<
1
>
(
y
);
});
ns
.
resize
(
k
);
ns
.
shrink_to_fit
();
});
for
(
auto
v
:
vertices_range
(
g
))
{
for
(
auto
&
u
:
vs
[
v
])
{
auto
e
=
add_edge
(
v
,
get
<
0
>
(
u
),
g
);
eweight
[
e
.
first
]
=
get
<
1
>
(
u
);
}
}
}
}
// graph_tool namespace
#endif // GRAPH_KNN_HH
src/graph_tool/generation/__init__.py
View file @
8472a7c4
...
...
@@ -53,6 +53,7 @@ dl_import("from . import libgraph_tool_generation")
from
..
import
Graph
,
GraphView
,
_check_prop_scalar
,
_prop
,
_limit_args
,
\
_gt_type
,
_get_rng
,
Vector_double
from
..
stats
import
remove_parallel_edges
import
inspect
import
types
import
numpy
...
...
@@ -61,12 +62,11 @@ import scipy.optimize
import
scipy.sparse
__all__
=
[
"random_graph"
,
"random_rewire"
,
"generate_sbm"
,
"solve_sbm_fugacities"
,
"generate_maxent_sbm"
,
"
predecessor_tree
"
,
"line_graph"
,
"graph_union"
,
"triangulation"
,
"lattice"
,
"geometric_graph"
,
"price_network"
,
"complete_graph"
,
"solve_sbm_fugacities"
,
"generate_maxent_sbm"
,
"
generate_knn
"
,
"predecessor_tree"
,
"line_graph"
,
"graph_union"
,
"triangulation"
,
"lattice"
,
"geometric_graph"
,
"price_network"
,
"complete_graph"
,
"circular_graph"
,
"condensation_graph"
]
def
random_graph
(
N
,
deg_sampler
,
directed
=
True
,
parallel_edges
=
False
,
self_loops
=
False
,
block_membership
=
None
,
block_type
=
"int"
,
degree_block
=
False
,
...
...
@@ -1378,7 +1378,95 @@ def generate_maxent_sbm(b, mrs, out_theta, in_theta=None, directed=False,
multigraph
,
self_loops
,
_get_rng
())
return
g
def
generate_knn
(
points
,
k
,
dist
=
None
,
exact
=
False
,
r
=
.
5
,
epsilon
=
.
001
,
directed
=
False
,
cache_dist
=
True
):
r
"""Generate a graph of k-nearest neighbors from a set of multidimensional points.
Parameters
----------
points : iterable of lists (or :class:`numpy.ndarray`) of dimension :math:`N\times D` or ``int``
Points of dimension :math:`D` to be considered. If the parameter `dist`
is passed, this should be just an `int` containing the number of points.
k : ``int``
Number of nearest neighbors.
dist : function (optional, default: ``None``)
If given, this should be a function that returns the distance between
two points. The arguments of this function should just be two integers,
corresponding to the vertex index. In this case the value of ``points``
should just be the total number of points. If ``dist is None``, then the
L2-norm (Euclidean distance) is used.
exact : ``bool`` (optional, default: ``False``)
If ``False``, an fast approximation will be used, otherwise an exact but
slow algorithm will be used.
r : ``float`` (optional, default: ``.5``)
If ``exact is False``, this specifies the fraction of randomly chosen
neighbors that are used for the search.
epsilon : ``float`` (optional, default: ``.001``)
If ``exact is False``, this determines the convergence criterion used by
the algorithm. When the fraction of updated neighbors drops below this
value, the algorithm stops.
directed : ``bool`` (optional, default: ``False``)
If ``True`` a directed version of the graph will be returned, otherwise
the graph is undirected.
cache_dist : ``bool`` (optional, default: ``True``)
If ``True``, an internal cache of the distance values are kept,
implemented as a hash table.
Returns
-------
g : :class:`~graph_tool.Graph`
The k-nearest neighbors graph.
w : :class:`~graph_tool.EdgePropertyMap`
Edge property map with the computed distances.
Notes
-----
The approximate version of this algorithm is based on
[[dong-efficient-2020]_, and has an (empirical) run-time of
:math:`O(N^{1.14})`. The exact version has a complexity of :math:`O(N^2)`.
If enabled during compilation, this algorithm runs in parallel.
References
----------
.. [dong-efficient-2020] Wei Dong, Charikar Moses, and Kai Li, "Efficient
k-nearest neighbor graph construction for generic similarity measures",
In Proceedings of the 20th international conference on World wide web
(WWW '11). Association for Computing Machinery, New York, NY, USA,
577–586, (2011) :doi:`https://doi.org/10.1145/1963405.1963487`
Examples
--------
>>> points = np.random.random((1000, 10))
>>> g, w = gt.generate_knn(points, k=5)
"""
if
dist
is
not
None
:
N
=
points
points
=
dist
else
:
points
=
numpy
.
asarray
(
points
,
dtype
=
"float"
)
N
=
points
.
shape
[
0
]
g
=
Graph
()
g
.
add_vertex
(
N
)
w
=
g
.
new_ep
(
"double"
)
if
exact
:
libgraph_tool_generation
.
gen_knn_exact
(
g
.
_Graph__graph
,
points
,
k
,
_prop
(
"e"
,
g
,
w
))
else
:
libgraph_tool_generation
.
gen_knn
(
g
.
_Graph__graph
,
points
,
k
,
r
,
epsilon
,
cache_dist
,
_prop
(
"e"
,
g
,
w
),
_get_rng
())
if
not
directed
:
g
.
set_directed
(
False
)
remove_parallel_edges
(
g
)
return
g
,
w
def
predecessor_tree
(
g
,
pred_map
):
"""Return a graph from a list of predecessors given by the ``pred_map`` vertex property."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment