__init__.py 12.4 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#! /usr/bin/env python
# graph_tool.py -- a general graph manipulation python module
#
# Copyright (C) 2007 Tiago de Paula Peixoto <tiago@forked.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

19
20
21
"""
``graph_tool.stats`` - Graph Statistics
---------------------------------------
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

Summary
+++++++

.. autosummary::
   :nosignatures:

   vertex_hist
   edge_hist
   vertex_average
   edge_average
   label_parallel_edges
   remove_parallel_edges
   label_self_loops
   remove_self_loops
   remove_labeled_edges
   distance_histogram

Contents
++++++++

43
44
"""

Tiago Peixoto's avatar
Tiago Peixoto committed
45
46
from .. dl_import import dl_import
dl_import("import libgraph_tool_stats")
Tiago Peixoto's avatar
Tiago Peixoto committed
47

48
from .. core import _degree, _prop
Tiago Peixoto's avatar
Tiago Peixoto committed
49
from numpy import *
50
51
import numpy
import sys
Tiago Peixoto's avatar
Tiago Peixoto committed
52

53
__all__ = ["vertex_hist", "edge_hist", "vertex_average", "edge_average",
54
           "label_parallel_edges", "remove_parallel_edges",
55
           "label_self_loops", "remove_self_loops", "remove_labeled_edges",
56
           "distance_histogram"]
Tiago Peixoto's avatar
Tiago Peixoto committed
57

58
def vertex_hist(g, deg, bins=[1], float_count=True):
59
60
61
62
63
    """
    Return the vertex histogram of the given degree type or property.

    Parameters
    ----------
64
    g : :class:`~graph_tool.Graph`
65
        Graph to be used.
66
    deg : string or :class:`~graph_tool.PropertyMap`
67
68
69
70
71
72
73
74
75
76
77
78
79
80
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.
    bins : list of bins
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant lenght given by this value.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
81
    counts : :class:`~numpy.ndarray`
82
        The bin counts.
83
    bins : :class:`~numpy.ndarray`
84
85
86
87
88
89
90
        The bin edges.

    See Also
    --------
    edge_hist: Edge histograms.
    vertex_average: Average of vertex properties, degrees.
    edge_average: Average of edge properties.
91
    distance_histogram : Shortest-distance histogram.
92
93
94
95
96
97
98
99
100
101
102

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
103
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
104
    >>> print gt.vertex_hist(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
105
106
    [array([  10.,   30.,   86.,  138.,  166.,  154.,  146.,  129.,   68.,
             36.,   23.,    8.,    3.,    2.,    0.,    1.]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15], dtype=uint64)]
107
108
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
109
    ret = libgraph_tool_stats.\
110
          get_vertex_histogram(g._Graph__graph, _degree(g, deg), bins)
111
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
112

113
def edge_hist(g, eprop, bins=[1], float_count=True):
114
115
116
117
118
    """
    Return the edge histogram of the given property.

    Parameters
    ----------
119
    g : :class:`~graph_tool.Graph`
120
        Graph to be used.
121
    eprop : :class:`~graph_tool.PropertyMap`
122
123
124
125
126
127
128
129
130
131
132
133
        Edge property to be used for the histogram.
    bins : list of bins
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant lenght given by this value.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
134
    counts : :class:`~numpy.ndarray`
135
        The bin counts.
136
    bins : :class:`~numpy.ndarray`
137
138
139
140
141
142
143
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    vertex_average : Average of vertex properties, degrees.
    edge_average : Average of edge properties.
144
    distance_histogram : Shortest-distance histogram.
145
146
147
148
149
150
151
152
153
154

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
155
156
157
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
158
159
160
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_hist(g, eprop, arange(0, 1, 0.1))
161
    [array([ 525.,  504.,  502.,  502.,  467.,  499.,  531.,  471.,  520.,  479.]), array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9])]
162
163
164

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
165
    ret = libgraph_tool_stats.\
166
          get_edge_histogram(g._Graph__graph, _prop("e", g, eprop), bins)
167
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
168

169
def vertex_average(g, deg):
170
171
172
173
174
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
175
    g : :class:`~graph_tool.Graph`
176
        Graph to be used.
177
    deg : string or :class:`~graph_tool.PropertyMap`
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.

    Returns
    -------
    average : float
        The average of the given degree or property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    edge_average : Average of edge properties.
194
    distance_histogram : Shortest-distance histogram.
195
196
197
198
199
200
201
202
203
204
205

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
206
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
207
    >>> print gt.vertex_average(g, "in")
Tiago Peixoto's avatar
Tiago Peixoto committed
208
    (5.0919999999999996, 0.071885575743677543)
209
210
    """

211
212
213
214
215
    ret = libgraph_tool_stats.\
          get_vertex_average(g._Graph__graph, _degree(g, deg))
    return ret

def edge_average(g, eprop):
216
217
218
219
220
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
221
    g : :class:`~graph_tool.Graph`
222
        Graph to be used.
223
    eprop : :class:`~graph_tool.PropertyMap`
224
225
226
227
228
229
230
231
232
233
234
235
236
237
        Edge property to be used for the histogram.

    Returns
    -------
    average : float
        The average of the given property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
238
    distance_histogram : Shortest-distance histogram.
239
240
241
242
243
244
245
246
247
248

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
249
250
251
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
252
253
254
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_average(g, eprop)
255
    (0.49683581007070887, 0.0040956077241228531)
256
257
    """

258
259
260
261
    ret = libgraph_tool_stats.\
          get_edge_average(g._Graph__graph, _prop("e", g, eprop))
    return ret

262
def remove_labeled_edges(g, label):
263
    """Remove every edge `e` such that `label[e] != 0`."""
264
265
266
267
268
269
    g.stash_filter(all=False, directed=True, reversed=True)
    libgraph_tool_stats.\
          remove_labeled_edges(g._Graph__graph, _prop("e", g, label))
    g.pop_filter(all=False, directed=True, reversed=True)

def label_parallel_edges(g, eprop=None):
270
271
272
273
    r"""Label edges which are parallel, i.e, have the same source and target
    vertices. For each parallel edge set :math:`PE`, the labelling starts from 0
    to :math:`|PE|-1`. If the `eprop` parameter is given (a
    :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
274
275
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
276
    libgraph_tool_stats.\
277
          label_parallel_edges(g._Graph__graph, _prop("e", g, eprop))
278
    return eprop
Tiago Peixoto's avatar
Tiago Peixoto committed
279

280
def remove_parallel_edges(g):
281
282
    """Remove all parallel edges from the graph. Only on edge from each parallel
    edge set is left."""
283
284
285
286
    eprop = label_parallel_edges(g)
    remove_labeled_edges(g, eprop)

def label_self_loops(g, eprop=None):
287
288
289
290
291
    """Label edges which are self-loops, i.e, the source and target vertices are
    the same. Self-loops are labeled with 1 and others with 0. If the `eprop`
    parameter is given (a :class:`~graph_tool.PropertyMap`), the labelling is
    stored there."""

292
293
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
294
    libgraph_tool_stats.\
295
          label_self_loops(g._Graph__graph, _prop("e", g, eprop))
296
    return eprop
297
298

def remove_self_loops(g):
299
    """Remove all self-loops edges from the graph."""
300
301
302
    eprop = label_self_loops(g)
    remove_labeled_edges(g, eprop)

303
304
305
306
def distance_histogram(g, weight=None, bins=[1], samples=None,
                       float_count=True):
    r"""
    Return the shortest-distance histogram for each vertex pair in the graph.
307

308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
    Parameters
    ----------
    g : :class:`Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: None)
        Edge weights.
    bins : list (optional, default: [1])
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant length given by this value.
    samples : int (optional, default: None)
        If supplied, the distances will be randomly sampled from a number of
        source vertices given by this parameter. It `samples == None` (default),
        all pairs are used.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
    counts : :class:`~numpy.ndarray`
        The bin counts.
    bins : :class:`~numpy.ndarray`
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
    distance_histogram : Shortest-distance histogram.

    Notes
    -----
    The algorithm runs in :math:`O(V^2)` time, or :math:`O(V^2\log V)` if
    `weight != None`. If `samples` is supplied, the complexities are
    :math:`O(\text{samples}\times V)`  and
    :math:`O(\text{samples}\times V\log V)`, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(100, lambda: (3, 3))
    >>> hist = gt.distance_histogram(g)
    >>> print hist
    [array([    0.,   300.,   862.,  2147.,  3766.,  2588.,   237.]), array([0, 1, 2, 3, 4, 5, 6], dtype=uint64)]
    >>> hist = gt.distance_histogram(g, samples=10)
    >>> print hist
Tiago Peixoto's avatar
Tiago Peixoto committed
360
    [array([   0.,   30.,   84.,  210.,  375.,  264.,   27.]), array([0, 1, 2, 3, 4, 5, 6], dtype=uint64)]
361
362
    """
    if samples != None:
363
        seed = numpy.random.randint(0, sys.maxint)
364
365
366
367
368
369
370
        ret = libgraph_tool_stats.\
              sampled_distance_histogram(g._Graph__graph,
                                         _prop("e", g, weight), bins,
                                         samples, seed)
    else:
        ret = libgraph_tool_stats.\
              distance_histogram(g._Graph__graph, _prop("e", g, weight), bins)
371
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]