__init__.py 12.8 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
Tiago Peixoto's avatar
Tiago Peixoto committed
3
#
4
5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2007-2011 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
22
23
"""
``graph_tool.stats`` - Graph Statistics
---------------------------------------
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

Summary
+++++++

.. autosummary::
   :nosignatures:

   vertex_hist
   edge_hist
   vertex_average
   edge_average
   label_parallel_edges
   remove_parallel_edges
   label_self_loops
   remove_self_loops
   remove_labeled_edges
   distance_histogram

Contents
++++++++

45
46
"""

Tiago Peixoto's avatar
Tiago Peixoto committed
47
48
from .. dl_import import dl_import
dl_import("import libgraph_tool_stats")
Tiago Peixoto's avatar
Tiago Peixoto committed
49

50
from .. import _degree, _prop
Tiago Peixoto's avatar
Tiago Peixoto committed
51
from numpy import *
52
53
import numpy
import sys
Tiago Peixoto's avatar
Tiago Peixoto committed
54

55
__all__ = ["vertex_hist", "edge_hist", "vertex_average", "edge_average",
56
           "label_parallel_edges", "remove_parallel_edges",
57
           "label_self_loops", "remove_self_loops", "remove_labeled_edges",
58
           "distance_histogram"]
Tiago Peixoto's avatar
Tiago Peixoto committed
59

Tiago Peixoto's avatar
Tiago Peixoto committed
60

61
def vertex_hist(g, deg, bins=[0, 1], float_count=True):
62
63
64
65
66
    """
    Return the vertex histogram of the given degree type or property.

    Parameters
    ----------
67
    g : :class:`~graph_tool.Graph`
68
        Graph to be used.
69
    deg : string or :class:`~graph_tool.PropertyMap`
70
71
72
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.
73
    bins : list of bins (optional, default: [0, 1])
74
        List of bins to be used for the histogram. The values given represent
75
76
77
78
        the edges of the bins (i.e. lower and upper bounds). If the list
        contains two value, this will be used to automatically create an
        appropriate bin range, with a constant lenght given by the difference of
        the two values, and starting from the first value.
79
80
81
82
83
84
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
85
    counts : :class:`~numpy.ndarray`
86
        The bin counts.
87
    bins : :class:`~numpy.ndarray`
88
89
90
91
92
93
94
        The bin edges.

    See Also
    --------
    edge_hist: Edge histograms.
    vertex_average: Average of vertex properties, degrees.
    edge_average: Average of edge properties.
95
    distance_histogram : Shortest-distance histogram.
96
97
98
99
100
101
102
103
104
105
106

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
107
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
108
    >>> print gt.vertex_hist(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
109
    [array([  10.,   30.,   86.,  138.,  166.,  154.,  146.,  129.,   68.,
110
             36.,   23.,    8.,    3.,    2.,    0.,    1.]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16], dtype=uint64)]
111
112
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
113
    ret = libgraph_tool_stats.\
114
115
          get_vertex_histogram(g._Graph__graph, _degree(g, deg),
                               [float(x) for x in bins])
116
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
117

118
119

def edge_hist(g, eprop, bins=[0, 1], float_count=True):
120
121
122
123
124
    """
    Return the edge histogram of the given property.

    Parameters
    ----------
125
    g : :class:`~graph_tool.Graph`
126
        Graph to be used.
127
    eprop : :class:`~graph_tool.PropertyMap`
128
        Edge property to be used for the histogram.
129
    bins : list of bins (optional, default: [0, 1])
130
        List of bins to be used for the histogram. The values given represent
131
132
133
134
        the edges of the bins (i.e. lower and upper bounds). If the list
        contains two value, this will be used to automatically create an
        appropriate bin range, with a constant lenght given by the difference of
        the two values, and starting from the first value.
135
136
137
138
139
140
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
141
    counts : :class:`~numpy.ndarray`
142
        The bin counts.
143
    bins : :class:`~numpy.ndarray`
144
145
146
147
148
149
150
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    vertex_average : Average of vertex properties, degrees.
    edge_average : Average of edge properties.
151
    distance_histogram : Shortest-distance histogram.
152
153
154
155
156
157
158
159
160
161

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
162
163
164
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
165
166
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
167
168
    >>> print gt.edge_hist(g, eprop, linspace(0, 1, 11))
    [array([ 525.,  504.,  502.,  502.,  468.,  499.,  531.,  471.,  520.,  478.]), array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ])]
169
170
171

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
172
    ret = libgraph_tool_stats.\
173
174
          get_edge_histogram(g._Graph__graph, _prop("e", g, eprop),
                             [float(x) for x in bins])
175
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
176

177

178
def vertex_average(g, deg):
179
180
181
182
183
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
184
    g : :class:`~graph_tool.Graph`
185
        Graph to be used.
186
    deg : string or :class:`~graph_tool.PropertyMap`
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.

    Returns
    -------
    average : float
        The average of the given degree or property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    edge_average : Average of edge properties.
203
    distance_histogram : Shortest-distance histogram.
204
205
206
207
208
209
210
211
212
213
214

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
215
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
216
    >>> print gt.vertex_average(g, "in")
Tiago Peixoto's avatar
Tiago Peixoto committed
217
    (5.092, 0.07188557574367754)
218
219
    """

220
221
222
223
    ret = libgraph_tool_stats.\
          get_vertex_average(g._Graph__graph, _degree(g, deg))
    return ret

224

225
def edge_average(g, eprop):
226
227
228
229
230
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
231
    g : :class:`~graph_tool.Graph`
232
        Graph to be used.
233
    eprop : :class:`~graph_tool.PropertyMap`
234
235
236
237
238
239
240
241
242
243
244
245
246
247
        Edge property to be used for the histogram.

    Returns
    -------
    average : float
        The average of the given property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
248
    distance_histogram : Shortest-distance histogram.
249
250
251
252
253
254
255
256
257
258

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
259
260
261
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
262
263
264
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_average(g, eprop)
Tiago Peixoto's avatar
Tiago Peixoto committed
265
    (0.49674035434130187, 0.004094604069093868)
266
267
    """

268
269
270
271
    ret = libgraph_tool_stats.\
          get_edge_average(g._Graph__graph, _prop("e", g, eprop))
    return ret

272

273
def remove_labeled_edges(g, label):
274
    """Remove every edge `e` such that `label[e] != 0`."""
275
276
277
278
279
    g.stash_filter(all=False, directed=True, reversed=True)
    libgraph_tool_stats.\
          remove_labeled_edges(g._Graph__graph, _prop("e", g, label))
    g.pop_filter(all=False, directed=True, reversed=True)

280

281
def label_parallel_edges(g, eprop=None):
282
283
284
285
    r"""Label edges which are parallel, i.e, have the same source and target
    vertices. For each parallel edge set :math:`PE`, the labelling starts from 0
    to :math:`|PE|-1`. If the `eprop` parameter is given (a
    :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
286
287
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
288
    libgraph_tool_stats.\
289
          label_parallel_edges(g._Graph__graph, _prop("e", g, eprop))
290
    return eprop
Tiago Peixoto's avatar
Tiago Peixoto committed
291

292

293
def remove_parallel_edges(g):
294
295
    """Remove all parallel edges from the graph. Only one edge from each
    parallel edge set is left."""
296
297
298
    eprop = label_parallel_edges(g)
    remove_labeled_edges(g, eprop)

299

300
def label_self_loops(g, eprop=None):
301
302
303
304
305
    """Label edges which are self-loops, i.e, the source and target vertices are
    the same. Self-loops are labeled with 1 and others with 0. If the `eprop`
    parameter is given (a :class:`~graph_tool.PropertyMap`), the labelling is
    stored there."""

306
307
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
308
    libgraph_tool_stats.\
309
          label_self_loops(g._Graph__graph, _prop("e", g, eprop))
310
    return eprop
311

312

313
def remove_self_loops(g):
314
    """Remove all self-loops edges from the graph."""
315
316
317
    eprop = label_self_loops(g)
    remove_labeled_edges(g, eprop)

318
319

def distance_histogram(g, weight=None, bins=[0, 1], samples=None,
320
321
322
                       float_count=True):
    r"""
    Return the shortest-distance histogram for each vertex pair in the graph.
323

324
325
326
327
328
329
    Parameters
    ----------
    g : :class:`Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: None)
        Edge weights.
330
    bins : list of bins (optional, default: [0, 1])
331
        List of bins to be used for the histogram. The values given represent
332
333
334
335
        the edges of the bins (i.e. lower and upper bounds). If the list
        contains two value, this will be used to automatically create an
        appropriate bin range, with a constant lenght given by the difference of
        the two values, and starting from the first value.
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
    samples : int (optional, default: None)
        If supplied, the distances will be randomly sampled from a number of
        source vertices given by this parameter. It `samples == None` (default),
        all pairs are used.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
    counts : :class:`~numpy.ndarray`
        The bin counts.
    bins : :class:`~numpy.ndarray`
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
    distance_histogram : Shortest-distance histogram.

    Notes
    -----
    The algorithm runs in :math:`O(V^2)` time, or :math:`O(V^2\log V)` if
    `weight != None`. If `samples` is supplied, the complexities are
    :math:`O(\text{samples}\times V)`  and
    :math:`O(\text{samples}\times V\log V)`, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(100, lambda: (3, 3))
    >>> hist = gt.distance_histogram(g)
    >>> print hist
374
    [array([    0.,   300.,   868.,  2222.,  3906.,  2463.,   141.]), array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint64)]
375
376
    >>> hist = gt.distance_histogram(g, samples=10)
    >>> print hist
377
    [array([   0.,   30.,   87.,  230.,  408.,  223.,   12.]), array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint64)]
378
    """
Tiago Peixoto's avatar
Tiago Peixoto committed
379

380
    if samples != None:
381
        seed = numpy.random.randint(0, sys.maxint)
382
383
        ret = libgraph_tool_stats.\
              sampled_distance_histogram(g._Graph__graph,
384
385
                                         _prop("e", g, weight),
                                         [float(x) for x in bins],
386
387
388
389
                                         samples, seed)
    else:
        ret = libgraph_tool_stats.\
              distance_histogram(g._Graph__graph, _prop("e", g, weight), bins)
390
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]