__init__.py 12.3 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#! /usr/bin/env python
# graph_tool.py -- a general graph manipulation python module
#
# Copyright (C) 2007 Tiago de Paula Peixoto <tiago@forked.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

19
20
21
"""
``graph_tool.stats`` - Graph Statistics
---------------------------------------
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

Summary
+++++++

.. autosummary::
   :nosignatures:

   vertex_hist
   edge_hist
   vertex_average
   edge_average
   label_parallel_edges
   remove_parallel_edges
   label_self_loops
   remove_self_loops
   remove_labeled_edges
   distance_histogram

Contents
++++++++

43
44
"""

Tiago Peixoto's avatar
Tiago Peixoto committed
45
46
from .. dl_import import dl_import
dl_import("import libgraph_tool_stats")
Tiago Peixoto's avatar
Tiago Peixoto committed
47

48
from .. core import _degree, _prop
Tiago Peixoto's avatar
Tiago Peixoto committed
49
from numpy import *
50
51
import numpy
import sys
Tiago Peixoto's avatar
Tiago Peixoto committed
52

53
__all__ = ["vertex_hist", "edge_hist", "vertex_average", "edge_average",
54
           "label_parallel_edges", "remove_parallel_edges",
55
           "label_self_loops", "remove_self_loops", "remove_labeled_edges",
56
           "distance_histogram"]
Tiago Peixoto's avatar
Tiago Peixoto committed
57

58
def vertex_hist(g, deg, bins=[1], float_count=True):
59
60
61
62
63
    """
    Return the vertex histogram of the given degree type or property.

    Parameters
    ----------
64
    g : :class:`~graph_tool.Graph`
65
        Graph to be used.
66
    deg : string or :class:`~graph_tool.PropertyMap`
67
68
69
70
71
72
73
74
75
76
77
78
79
80
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.
    bins : list of bins
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant lenght given by this value.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
81
    counts : :class:`~numpy.ndarray`
82
        The bin counts.
83
    bins : :class:`~numpy.ndarray`
84
85
86
87
88
89
90
        The bin edges.

    See Also
    --------
    edge_hist: Edge histograms.
    vertex_average: Average of vertex properties, degrees.
    edge_average: Average of edge properties.
91
    distance_histogram : Shortest-distance histogram.
92
93
94
95
96
97
98
99
100
101
102

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
103
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
104
    >>> print gt.vertex_hist(g, "out")
105
106
    [array([   8.,   33.,  100.,  141.,  167.,  165.,  142.,  114.,   76.,
             25.,   21.,    7.,    0.,    1.]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13], dtype=uint64)]
107
108
109

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
110
    ret = libgraph_tool_stats.\
111
          get_vertex_histogram(g._Graph__graph, _degree(g, deg), bins)
112
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
113

114
def edge_hist(g, eprop, bins=[1], float_count=True):
115
116
117
118
119
    """
    Return the edge histogram of the given property.

    Parameters
    ----------
120
    g : :class:`~graph_tool.Graph`
121
        Graph to be used.
122
    eprop : :class:`~graph_tool.PropertyMap`
123
124
125
126
127
128
129
130
131
132
133
134
        Edge property to be used for the histogram.
    bins : list of bins
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant lenght given by this value.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
135
    counts : :class:`~numpy.ndarray`
136
        The bin counts.
137
    bins : :class:`~numpy.ndarray`
138
139
140
141
142
143
144
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    vertex_average : Average of vertex properties, degrees.
    edge_average : Average of edge properties.
145
    distance_histogram : Shortest-distance histogram.
146
147
148
149
150
151
152
153
154
155

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
156
157
158
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
159
160
161
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_hist(g, eprop, arange(0, 1, 0.1))
162
    [array([ 525.,  504.,  502.,  502.,  467.,  499.,  531.,  471.,  520.,  479.]), array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9])]
163
164
165

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
166
    ret = libgraph_tool_stats.\
167
          get_edge_histogram(g._Graph__graph, _prop("e", g, eprop), bins)
168
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
169

170
def vertex_average(g, deg):
171
172
173
174
175
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
176
    g : :class:`~graph_tool.Graph`
177
        Graph to be used.
178
    deg : string or :class:`~graph_tool.PropertyMap`
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.

    Returns
    -------
    average : float
        The average of the given degree or property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    edge_average : Average of edge properties.
195
    distance_histogram : Shortest-distance histogram.
196
197
198
199
200
201
202
203
204
205
206

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
207
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
208
    >>> print gt.vertex_average(g, "in")
209
    (4.9320000000000004, 0.067833443079354308)
210
211
    """

212
213
214
215
216
    ret = libgraph_tool_stats.\
          get_vertex_average(g._Graph__graph, _degree(g, deg))
    return ret

def edge_average(g, eprop):
217
218
219
220
221
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
222
    g : :class:`~graph_tool.Graph`
223
        Graph to be used.
224
    eprop : :class:`~graph_tool.PropertyMap`
225
226
227
228
229
230
231
232
233
234
235
236
237
238
        Edge property to be used for the histogram.

    Returns
    -------
    average : float
        The average of the given property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
239
    distance_histogram : Shortest-distance histogram.
240
241
242
243
244
245
246
247
248
249

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
250
251
252
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
253
254
255
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_average(g, eprop)
256
    (0.49683581007070887, 0.0040956077241228531)
257
258
    """

259
260
261
262
    ret = libgraph_tool_stats.\
          get_edge_average(g._Graph__graph, _prop("e", g, eprop))
    return ret

263
def remove_labeled_edges(g, label):
264
    """Remove every edge `e` such that `label[e] != 0`."""
265
266
267
268
269
270
    g.stash_filter(all=False, directed=True, reversed=True)
    libgraph_tool_stats.\
          remove_labeled_edges(g._Graph__graph, _prop("e", g, label))
    g.pop_filter(all=False, directed=True, reversed=True)

def label_parallel_edges(g, eprop=None):
271
272
273
274
    r"""Label edges which are parallel, i.e, have the same source and target
    vertices. For each parallel edge set :math:`PE`, the labelling starts from 0
    to :math:`|PE|-1`. If the `eprop` parameter is given (a
    :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
275
276
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
277
    libgraph_tool_stats.\
278
          label_parallel_edges(g._Graph__graph, _prop("e", g, eprop))
279
    return eprop
Tiago Peixoto's avatar
Tiago Peixoto committed
280

281
def remove_parallel_edges(g):
282
283
    """Remove all parallel edges from the graph. Only on edge from each parallel
    edge set is left."""
284
285
286
287
    eprop = label_parallel_edges(g)
    remove_labeled_edges(g, eprop)

def label_self_loops(g, eprop=None):
288
289
290
291
292
    """Label edges which are self-loops, i.e, the source and target vertices are
    the same. Self-loops are labeled with 1 and others with 0. If the `eprop`
    parameter is given (a :class:`~graph_tool.PropertyMap`), the labelling is
    stored there."""

293
294
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
295
    libgraph_tool_stats.\
296
          label_self_loops(g._Graph__graph, _prop("e", g, eprop))
297
    return eprop
298
299

def remove_self_loops(g):
300
    """Remove all self-loops edges from the graph."""
301
302
303
    eprop = label_self_loops(g)
    remove_labeled_edges(g, eprop)

304
305
306
307
def distance_histogram(g, weight=None, bins=[1], samples=None,
                       float_count=True):
    r"""
    Return the shortest-distance histogram for each vertex pair in the graph.
308

309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
    Parameters
    ----------
    g : :class:`Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: None)
        Edge weights.
    bins : list (optional, default: [1])
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant length given by this value.
    samples : int (optional, default: None)
        If supplied, the distances will be randomly sampled from a number of
        source vertices given by this parameter. It `samples == None` (default),
        all pairs are used.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
    counts : :class:`~numpy.ndarray`
        The bin counts.
    bins : :class:`~numpy.ndarray`
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
    distance_histogram : Shortest-distance histogram.

    Notes
    -----
    The algorithm runs in :math:`O(V^2)` time, or :math:`O(V^2\log V)` if
    `weight != None`. If `samples` is supplied, the complexities are
    :math:`O(\text{samples}\times V)`  and
    :math:`O(\text{samples}\times V\log V)`, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(100, lambda: (3, 3))
    >>> hist = gt.distance_histogram(g)
    >>> print hist
    [array([    0.,   300.,   862.,  2147.,  3766.,  2588.,   237.]), array([0, 1, 2, 3, 4, 5, 6], dtype=uint64)]
    >>> hist = gt.distance_histogram(g, samples=10)
    >>> print hist
    [array([   0.,   30.,   87.,  219.,  375.,  255.,   24.]), array([0, 1, 2, 3, 4, 5, 6], dtype=uint64)]
    """
    if samples != None:
364
        seed = numpy.random.randint(0, sys.maxint)
365
366
367
368
369
370
371
        ret = libgraph_tool_stats.\
              sampled_distance_histogram(g._Graph__graph,
                                         _prop("e", g, weight), bins,
                                         samples, seed)
    else:
        ret = libgraph_tool_stats.\
              distance_histogram(g._Graph__graph, _prop("e", g, weight), bins)
372
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]