__init__.py 12.5 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
Tiago Peixoto's avatar
Tiago Peixoto committed
3
#
4
5
6
# graph_tool -- a general graph manipulation python module
#
# Copyright (C) 2007-2010 Tiago de Paula Peixoto <tiago@forked.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
22
23
"""
``graph_tool.stats`` - Graph Statistics
---------------------------------------
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

Summary
+++++++

.. autosummary::
   :nosignatures:

   vertex_hist
   edge_hist
   vertex_average
   edge_average
   label_parallel_edges
   remove_parallel_edges
   label_self_loops
   remove_self_loops
   remove_labeled_edges
   distance_histogram

Contents
++++++++

45
46
"""

Tiago Peixoto's avatar
Tiago Peixoto committed
47
48
from .. dl_import import dl_import
dl_import("import libgraph_tool_stats")
Tiago Peixoto's avatar
Tiago Peixoto committed
49

50
from .. core import _degree, _prop
Tiago Peixoto's avatar
Tiago Peixoto committed
51
from numpy import *
52
53
import numpy
import sys
Tiago Peixoto's avatar
Tiago Peixoto committed
54

55
__all__ = ["vertex_hist", "edge_hist", "vertex_average", "edge_average",
56
           "label_parallel_edges", "remove_parallel_edges",
57
           "label_self_loops", "remove_self_loops", "remove_labeled_edges",
58
           "distance_histogram"]
Tiago Peixoto's avatar
Tiago Peixoto committed
59

Tiago Peixoto's avatar
Tiago Peixoto committed
60
61

def vertex_hist(g, deg, bins=None, float_count=True):
62
63
64
65
66
    """
    Return the vertex histogram of the given degree type or property.

    Parameters
    ----------
67
    g : :class:`~graph_tool.Graph`
68
        Graph to be used.
69
    deg : string or :class:`~graph_tool.PropertyMap`
70
71
72
73
74
75
76
77
78
79
80
81
82
83
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.
    bins : list of bins
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant lenght given by this value.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
84
    counts : :class:`~numpy.ndarray`
85
        The bin counts.
86
    bins : :class:`~numpy.ndarray`
87
88
89
90
91
92
93
        The bin edges.

    See Also
    --------
    edge_hist: Edge histograms.
    vertex_average: Average of vertex properties, degrees.
    edge_average: Average of edge properties.
94
    distance_histogram : Shortest-distance histogram.
95
96
97
98
99
100
101
102
103
104
105

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
106
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
107
    >>> print gt.vertex_hist(g, "out")
Tiago Peixoto's avatar
Tiago Peixoto committed
108
109
    [array([  10.,   30.,   86.,  138.,  166.,  154.,  146.,  129.,   68.,
             36.,   23.,    8.,    3.,    2.,    0.,    1.]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15], dtype=uint64)]
110
111
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
112
113
    if bins == None:
        bins = [1]
Tiago Peixoto's avatar
Tiago Peixoto committed
114
    ret = libgraph_tool_stats.\
115
          get_vertex_histogram(g._Graph__graph, _degree(g, deg), bins)
116
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
117

Tiago Peixoto's avatar
Tiago Peixoto committed
118
def edge_hist(g, eprop, bins=None, float_count=True):
119
120
121
122
123
    """
    Return the edge histogram of the given property.

    Parameters
    ----------
124
    g : :class:`~graph_tool.Graph`
125
        Graph to be used.
126
    eprop : :class:`~graph_tool.PropertyMap`
127
128
129
130
131
132
133
134
135
136
137
138
        Edge property to be used for the histogram.
    bins : list of bins
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant lenght given by this value.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
139
    counts : :class:`~numpy.ndarray`
140
        The bin counts.
141
    bins : :class:`~numpy.ndarray`
142
143
144
145
146
147
148
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    vertex_average : Average of vertex properties, degrees.
    edge_average : Average of edge properties.
149
    distance_histogram : Shortest-distance histogram.
150
151
152
153
154
155
156
157
158
159

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
160
161
162
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
163
164
165
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_hist(g, eprop, arange(0, 1, 0.1))
Tiago Peixoto's avatar
Tiago Peixoto committed
166
    [array([ 525.,  504.,  502.,  502.,  468.,  499.,  531.,  471.,  520.,  478.]), array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9])]
167
168
169

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
170
171
    if bins == None:
        bins = [1]
Tiago Peixoto's avatar
Tiago Peixoto committed
172
    ret = libgraph_tool_stats.\
173
          get_edge_histogram(g._Graph__graph, _prop("e", g, eprop), bins)
174
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
175

176
def vertex_average(g, deg):
177
178
179
180
181
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
182
    g : :class:`~graph_tool.Graph`
183
        Graph to be used.
184
    deg : string or :class:`~graph_tool.PropertyMap`
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.

    Returns
    -------
    average : float
        The average of the given degree or property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    edge_average : Average of edge properties.
201
    distance_histogram : Shortest-distance histogram.
202
203
204
205
206
207
208
209
210
211
212

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
213
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
214
    >>> print gt.vertex_average(g, "in")
Tiago Peixoto's avatar
Tiago Peixoto committed
215
    (5.0919999999999996, 0.071885575743677543)
216
217
    """

218
219
220
221
222
    ret = libgraph_tool_stats.\
          get_vertex_average(g._Graph__graph, _degree(g, deg))
    return ret

def edge_average(g, eprop):
223
224
225
226
227
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
228
    g : :class:`~graph_tool.Graph`
229
        Graph to be used.
230
    eprop : :class:`~graph_tool.PropertyMap`
231
232
233
234
235
236
237
238
239
240
241
242
243
244
        Edge property to be used for the histogram.

    Returns
    -------
    average : float
        The average of the given property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
245
    distance_histogram : Shortest-distance histogram.
246
247
248
249
250
251
252
253
254
255

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
256
257
258
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
259
260
261
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
    >>> print gt.edge_average(g, eprop)
Tiago Peixoto's avatar
Tiago Peixoto committed
262
    (0.49674035434130187, 0.0040946040690938677)
263
264
    """

265
266
267
268
    ret = libgraph_tool_stats.\
          get_edge_average(g._Graph__graph, _prop("e", g, eprop))
    return ret

269
def remove_labeled_edges(g, label):
270
    """Remove every edge `e` such that `label[e] != 0`."""
271
272
273
274
275
276
    g.stash_filter(all=False, directed=True, reversed=True)
    libgraph_tool_stats.\
          remove_labeled_edges(g._Graph__graph, _prop("e", g, label))
    g.pop_filter(all=False, directed=True, reversed=True)

def label_parallel_edges(g, eprop=None):
277
278
279
280
    r"""Label edges which are parallel, i.e, have the same source and target
    vertices. For each parallel edge set :math:`PE`, the labelling starts from 0
    to :math:`|PE|-1`. If the `eprop` parameter is given (a
    :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
281
282
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
283
    libgraph_tool_stats.\
284
          label_parallel_edges(g._Graph__graph, _prop("e", g, eprop))
285
    return eprop
Tiago Peixoto's avatar
Tiago Peixoto committed
286

287
def remove_parallel_edges(g):
288
289
    """Remove all parallel edges from the graph. Only one edge from each
    parallel edge set is left."""
290
291
292
293
    eprop = label_parallel_edges(g)
    remove_labeled_edges(g, eprop)

def label_self_loops(g, eprop=None):
294
295
296
297
298
    """Label edges which are self-loops, i.e, the source and target vertices are
    the same. Self-loops are labeled with 1 and others with 0. If the `eprop`
    parameter is given (a :class:`~graph_tool.PropertyMap`), the labelling is
    stored there."""

299
300
    if eprop == None:
        eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
301
    libgraph_tool_stats.\
302
          label_self_loops(g._Graph__graph, _prop("e", g, eprop))
303
    return eprop
304
305

def remove_self_loops(g):
306
    """Remove all self-loops edges from the graph."""
307
308
309
    eprop = label_self_loops(g)
    remove_labeled_edges(g, eprop)

Tiago Peixoto's avatar
Tiago Peixoto committed
310
def distance_histogram(g, weight=None, bins=None, samples=None,
311
312
313
                       float_count=True):
    r"""
    Return the shortest-distance histogram for each vertex pair in the graph.
314

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
    Parameters
    ----------
    g : :class:`Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: None)
        Edge weights.
    bins : list (optional, default: [1])
        List of bins to be used for the histogram. The values given represent
        the edges of the bins (i,e, lower bounds). If the list contains only one
        value, this will be used to automatically create an appropriate bin
        range, with a constant length given by this value.
    samples : int (optional, default: None)
        If supplied, the distances will be randomly sampled from a number of
        source vertices given by this parameter. It `samples == None` (default),
        all pairs are used.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
    counts : :class:`~numpy.ndarray`
        The bin counts.
    bins : :class:`~numpy.ndarray`
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
    distance_histogram : Shortest-distance histogram.

    Notes
    -----
    The algorithm runs in :math:`O(V^2)` time, or :math:`O(V^2\log V)` if
    `weight != None`. If `samples` is supplied, the complexities are
    :math:`O(\text{samples}\times V)`  and
    :math:`O(\text{samples}\times V\log V)`, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(100, lambda: (3, 3))
    >>> hist = gt.distance_histogram(g)
    >>> print hist
Tiago Peixoto's avatar
Tiago Peixoto committed
364
    [array([    0.,   300.,   868.,  2222.,  3906.,  2463.,   141.]), array([0, 1, 2, 3, 4, 5, 6], dtype=uint64)]
365
366
    >>> hist = gt.distance_histogram(g, samples=10)
    >>> print hist
Tiago Peixoto's avatar
Tiago Peixoto committed
367
    [array([   0.,   30.,   87.,  230.,  408.,  223.,   12.]), array([0, 1, 2, 3, 4, 5, 6], dtype=uint64)]
368
    """
Tiago Peixoto's avatar
Tiago Peixoto committed
369
370
371

    if bins == None:
        bins = [1]
372
    if samples != None:
373
        seed = numpy.random.randint(0, sys.maxint)
374
375
376
377
378
379
380
        ret = libgraph_tool_stats.\
              sampled_distance_histogram(g._Graph__graph,
                                         _prop("e", g, weight), bins,
                                         samples, seed)
    else:
        ret = libgraph_tool_stats.\
              distance_histogram(g._Graph__graph, _prop("e", g, weight), bins)
381
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]