__init__.py 13.4 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
Tiago Peixoto's avatar
Tiago Peixoto committed
3
#
4
5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2007-2012 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
"""
Tiago Peixoto's avatar
Tiago Peixoto committed
22
23
``graph_tool.stats`` - Miscellaneous statistics
-----------------------------------------------
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

Summary
+++++++

.. autosummary::
   :nosignatures:

   vertex_hist
   edge_hist
   vertex_average
   edge_average
   label_parallel_edges
   remove_parallel_edges
   label_self_loops
   remove_self_loops
   remove_labeled_edges
   distance_histogram

Contents
++++++++

45
46
"""

47
48
from __future__ import division, absolute_import, print_function

Tiago Peixoto's avatar
Tiago Peixoto committed
49
from .. dl_import import dl_import
50
dl_import("from . import libgraph_tool_stats")
Tiago Peixoto's avatar
Tiago Peixoto committed
51

52
from .. import _degree, _prop, _get_rng
Tiago Peixoto's avatar
Tiago Peixoto committed
53
from numpy import *
54
55
import numpy
import sys
Tiago Peixoto's avatar
Tiago Peixoto committed
56

57
__all__ = ["vertex_hist", "edge_hist", "vertex_average", "edge_average",
58
           "label_parallel_edges", "remove_parallel_edges",
59
           "label_self_loops", "remove_self_loops", "remove_labeled_edges",
60
           "distance_histogram"]
Tiago Peixoto's avatar
Tiago Peixoto committed
61

Tiago Peixoto's avatar
Tiago Peixoto committed
62

63
def vertex_hist(g, deg, bins=[0, 1], float_count=True):
64
65
66
67
68
    """
    Return the vertex histogram of the given degree type or property.

    Parameters
    ----------
69
    g : :class:`~graph_tool.Graph`
70
        Graph to be used.
71
    deg : string or :class:`~graph_tool.PropertyMap`
72
73
74
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.
75
    bins : list of bins (optional, default: [0, 1])
76
        List of bins to be used for the histogram. The values given represent
77
        the edges of the bins (i.e. lower and upper bounds). If the list
78
79
80
        contains two values, this will be used to automatically create an
        appropriate bin range, with a constant width given by the second value,
        and starting from the first value.
81
82
83
84
85
86
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
87
    counts : :class:`~numpy.ndarray`
88
        The bin counts.
89
    bins : :class:`~numpy.ndarray`
90
91
92
93
94
95
96
        The bin edges.

    See Also
    --------
    edge_hist: Edge histograms.
    vertex_average: Average of vertex properties, degrees.
    edge_average: Average of edge properties.
97
    distance_histogram : Shortest-distance histogram.
98
99
100
101
102
103
104
105
106
107
108

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
109
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
110
    >>> print(gt.vertex_hist(g, "out"))
Tiago Peixoto's avatar
Tiago Peixoto committed
111
    [array([  10.,   30.,   86.,  138.,  166.,  154.,  146.,  129.,   68.,
112
             36.,   23.,    8.,    3.,    2.,    0.,    1.]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16], dtype=uint64)]
113
114
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
115
    ret = libgraph_tool_stats.\
116
117
          get_vertex_histogram(g._Graph__graph, _degree(g, deg),
                               [float(x) for x in bins])
118
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
119

120
121

def edge_hist(g, eprop, bins=[0, 1], float_count=True):
122
123
124
125
126
    """
    Return the edge histogram of the given property.

    Parameters
    ----------
127
    g : :class:`~graph_tool.Graph`
128
        Graph to be used.
129
    eprop : :class:`~graph_tool.PropertyMap`
130
        Edge property to be used for the histogram.
131
    bins : list of bins (optional, default: [0, 1])
132
        List of bins to be used for the histogram. The values given represent
133
        the edges of the bins (i.e. lower and upper bounds). If the list
134
135
136
        contains two values, this will be used to automatically create an
        appropriate bin range, with a constant width given by the second value,
        and starting from the first value.
137
138
139
140
141
142
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
143
    counts : :class:`~numpy.ndarray`
144
        The bin counts.
145
    bins : :class:`~numpy.ndarray`
146
147
148
149
150
151
152
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    vertex_average : Average of vertex properties, degrees.
    edge_average : Average of edge properties.
153
    distance_histogram : Shortest-distance histogram.
154
155
156
157
158
159
160
161
162
163

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
164
165
166
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
167
168
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
169
    >>> print(gt.edge_hist(g, eprop, linspace(0, 1, 11)))
170
    [array([ 525.,  504.,  502.,  502.,  468.,  499.,  531.,  471.,  520.,  478.]), array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ])]
171
172
173

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
174
    ret = libgraph_tool_stats.\
175
176
          get_edge_histogram(g._Graph__graph, _prop("e", g, eprop),
                             [float(x) for x in bins])
177
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
178

179

180
def vertex_average(g, deg):
181
182
183
184
185
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
186
    g : :class:`~graph_tool.Graph`
187
        Graph to be used.
188
    deg : string or :class:`~graph_tool.PropertyMap`
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.

    Returns
    -------
    average : float
        The average of the given degree or property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    edge_average : Average of edge properties.
205
    distance_histogram : Shortest-distance histogram.
206
207
208
209
210
211
212
213
214
215
216

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import poisson, seed
    >>> seed(42)
217
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
218
    >>> print(gt.vertex_average(g, "in"))
Tiago Peixoto's avatar
Tiago Peixoto committed
219
    (5.092, 0.07188557574367754)
220
221
    """

222
223
224
225
    ret = libgraph_tool_stats.\
          get_vertex_average(g._Graph__graph, _degree(g, deg))
    return ret

226

227
def edge_average(g, eprop):
228
229
230
231
232
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
233
    g : :class:`~graph_tool.Graph`
234
        Graph to be used.
235
    eprop : :class:`~graph_tool.PropertyMap`
236
237
238
239
240
241
242
243
244
245
246
247
248
249
        Edge property to be used for the histogram.

    Returns
    -------
    average : float
        The average of the given property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
250
    distance_histogram : Shortest-distance histogram.
251
252
253
254
255
256
257
258
259
260

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy import arange
261
262
263
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(1000, lambda: (5, 5))
264
265
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
266
    >>> print(gt.edge_average(g, eprop))
Tiago Peixoto's avatar
Tiago Peixoto committed
267
    (0.49674035434130187, 0.004094604069093868)
268
269
    """

270
271
272
273
    ret = libgraph_tool_stats.\
          get_edge_average(g._Graph__graph, _prop("e", g, eprop))
    return ret

274

275
def remove_labeled_edges(g, label):
276
    """Remove every edge `e` such that `label[e] != 0`."""
277
278
279
280
281
    g.stash_filter(all=False, directed=True, reversed=True)
    libgraph_tool_stats.\
          remove_labeled_edges(g._Graph__graph, _prop("e", g, label))
    g.pop_filter(all=False, directed=True, reversed=True)

282

Tiago Peixoto's avatar
Tiago Peixoto committed
283
def label_parallel_edges(g, mark_only=False, count_all=False, eprop=None):
284
285
    r"""Label edges which are parallel, i.e, have the same source and target
    vertices. For each parallel edge set :math:`PE`, the labelling starts from 0
Tiago Peixoto's avatar
Tiago Peixoto committed
286
287
288
289
290
291
292
293
294
    to :math:`|PE|-1`. (If `count_all==True`, the range is 0 to :math:`|PE|`
    instead). If `mark_only==True`, all parallel edges are simply marked with
    the value 1. If the `eprop` parameter is given
    (a :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
    if eprop is None:
        if mark_only:
            eprop = g.new_edge_property("bool")
        else:
            eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
295
    libgraph_tool_stats.\
Tiago Peixoto's avatar
Tiago Peixoto committed
296
297
          label_parallel_edges(g._Graph__graph, _prop("e", g, eprop),
                               mark_only, count_all)
298
    return eprop
Tiago Peixoto's avatar
Tiago Peixoto committed
299

300

301
def remove_parallel_edges(g):
302
303
    """Remove all parallel edges from the graph. Only one edge from each
    parallel edge set is left."""
304
305
306
    eprop = label_parallel_edges(g)
    remove_labeled_edges(g, eprop)

307

Tiago Peixoto's avatar
Tiago Peixoto committed
308
def label_self_loops(g, mark_only=False, eprop=None):
309
    """Label edges which are self-loops, i.e, the source and target vertices are
Tiago Peixoto's avatar
Tiago Peixoto committed
310
311
312
313
    the same. For each self-loop edge set :math:`SL`, the labelling starts from 0
    to :math:`|SL|-1`. If `mark_only == True`, self-loops are labeled with 1
    and others with 0. If the `eprop` parameter is given
    (a :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
314

Tiago Peixoto's avatar
Tiago Peixoto committed
315
316
317
318
319
    if eprop is None:
        if mark_only:
            eprop = g.new_edge_property("bool")
        else:
            eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
320
    libgraph_tool_stats.\
Tiago Peixoto's avatar
Tiago Peixoto committed
321
322
          label_self_loops(g._Graph__graph, _prop("e", g, eprop),
                           mark_only)
323
    return eprop
324

325

326
def remove_self_loops(g):
327
    """Remove all self-loops edges from the graph."""
328
329
330
    eprop = label_self_loops(g)
    remove_labeled_edges(g, eprop)

331
332

def distance_histogram(g, weight=None, bins=[0, 1], samples=None,
333
334
335
                       float_count=True):
    r"""
    Return the shortest-distance histogram for each vertex pair in the graph.
336

337
338
339
340
341
342
    Parameters
    ----------
    g : :class:`Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: None)
        Edge weights.
343
    bins : list of bins (optional, default: [0, 1])
344
        List of bins to be used for the histogram. The values given represent
345
        the edges of the bins (i.e. lower and upper bounds). If the list
346
347
348
        contains two values, this will be used to automatically create an
        appropriate bin range, with a constant width given by the second value,
        and starting from the first value.
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
    samples : int (optional, default: None)
        If supplied, the distances will be randomly sampled from a number of
        source vertices given by this parameter. It `samples == None` (default),
        all pairs are used.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
    counts : :class:`~numpy.ndarray`
        The bin counts.
    bins : :class:`~numpy.ndarray`
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
    distance_histogram : Shortest-distance histogram.

    Notes
    -----
    The algorithm runs in :math:`O(V^2)` time, or :math:`O(V^2\log V)` if
    `weight != None`. If `samples` is supplied, the complexities are
    :math:`O(\text{samples}\times V)`  and
    :math:`O(\text{samples}\times V\log V)`, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
    >>> from numpy.random import random, seed
    >>> seed(42)
    >>> g = gt.random_graph(100, lambda: (3, 3))
    >>> hist = gt.distance_histogram(g)
386
    >>> print(hist)
Tiago Peixoto's avatar
Tiago Peixoto committed
387
    [array([    0.,   300.,   861.,  2165.,  3801.,  2576.,   197.]), array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint64)]
388
    >>> hist = gt.distance_histogram(g, samples=10)
389
    >>> print(hist)
Tiago Peixoto's avatar
Tiago Peixoto committed
390
    [array([   0.,   30.,   87.,  221.,  395.,  234.,   23.]), array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint64)]
391
    """
Tiago Peixoto's avatar
Tiago Peixoto committed
392

393
394
395
    if samples != None:
        ret = libgraph_tool_stats.\
              sampled_distance_histogram(g._Graph__graph,
396
397
                                         _prop("e", g, weight),
                                         [float(x) for x in bins],
398
                                         samples, _get_rng())
399
400
401
    else:
        ret = libgraph_tool_stats.\
              distance_histogram(g._Graph__graph, _prop("e", g, weight), bins)
402
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]