__init__.py 13.7 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
Tiago Peixoto's avatar
Tiago Peixoto committed
3
#
4
5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2013 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
"""
Tiago Peixoto's avatar
Tiago Peixoto committed
22
23
``graph_tool.stats`` - Miscellaneous statistics
-----------------------------------------------
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

Summary
+++++++

.. autosummary::
   :nosignatures:

   vertex_hist
   edge_hist
   vertex_average
   edge_average
   label_parallel_edges
   remove_parallel_edges
   label_self_loops
   remove_self_loops
   remove_labeled_edges
   distance_histogram

Contents
++++++++

45
46
"""

47
48
from __future__ import division, absolute_import, print_function

Tiago Peixoto's avatar
Tiago Peixoto committed
49
from .. dl_import import dl_import
50
dl_import("from . import libgraph_tool_stats")
Tiago Peixoto's avatar
Tiago Peixoto committed
51

52
from .. import _degree, _prop, _get_rng, GraphView
Tiago Peixoto's avatar
Tiago Peixoto committed
53
from numpy import *
54
55
import numpy
import sys
Tiago Peixoto's avatar
Tiago Peixoto committed
56

57
__all__ = ["vertex_hist", "edge_hist", "vertex_average", "edge_average",
58
           "label_parallel_edges", "remove_parallel_edges",
59
           "label_self_loops", "remove_self_loops", "remove_labeled_edges",
60
           "distance_histogram"]
Tiago Peixoto's avatar
Tiago Peixoto committed
61

Tiago Peixoto's avatar
Tiago Peixoto committed
62

63
def vertex_hist(g, deg, bins=[0, 1], float_count=True):
64
65
66
67
68
    """
    Return the vertex histogram of the given degree type or property.

    Parameters
    ----------
69
    g : :class:`~graph_tool.Graph`
70
        Graph to be used.
71
    deg : string or :class:`~graph_tool.PropertyMap`
72
73
74
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.
75
    bins : list of bins (optional, default: [0, 1])
76
        List of bins to be used for the histogram. The values given represent
77
        the edges of the bins (i.e. lower and upper bounds). If the list
78
79
80
        contains two values, this will be used to automatically create an
        appropriate bin range, with a constant width given by the second value,
        and starting from the first value.
81
82
83
84
85
86
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
87
    counts : :class:`~numpy.ndarray`
88
        The bin counts.
89
    bins : :class:`~numpy.ndarray`
90
91
92
93
94
95
96
        The bin edges.

    See Also
    --------
    edge_hist: Edge histograms.
    vertex_average: Average of vertex properties, degrees.
    edge_average: Average of edge properties.
97
    distance_histogram : Shortest-distance histogram.
98
99
100
101
102
103
104
105
106

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
107
108
109
110
111
112
113
    .. testsetup::

       import numpy.random
       numpy.random.seed(42)
       gt.seed_rng(42)

    >>> from numpy.random import poisson
114
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
115
    >>> print(gt.vertex_hist(g, "out"))
116
117
    [array([  10.,   36.,   90.,  147.,  164.,  165.,  142.,  109.,   70.,
             31.,   28.,    7.,    1.]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13], dtype=uint64)]
118
119
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
120
    ret = libgraph_tool_stats.\
121
122
          get_vertex_histogram(g._Graph__graph, _degree(g, deg),
                               [float(x) for x in bins])
123
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
124

125
126

def edge_hist(g, eprop, bins=[0, 1], float_count=True):
127
128
129
130
131
    """
    Return the edge histogram of the given property.

    Parameters
    ----------
132
    g : :class:`~graph_tool.Graph`
133
        Graph to be used.
134
    eprop : :class:`~graph_tool.PropertyMap`
135
        Edge property to be used for the histogram.
136
    bins : list of bins (optional, default: [0, 1])
137
        List of bins to be used for the histogram. The values given represent
138
        the edges of the bins (i.e. lower and upper bounds). If the list
139
140
141
        contains two values, this will be used to automatically create an
        appropriate bin range, with a constant width given by the second value,
        and starting from the first value.
142
143
144
145
146
147
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
148
    counts : :class:`~numpy.ndarray`
149
        The bin counts.
150
    bins : :class:`~numpy.ndarray`
151
152
153
154
155
156
157
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    vertex_average : Average of vertex properties, degrees.
    edge_average : Average of edge properties.
158
    distance_histogram : Shortest-distance histogram.
159
160
161
162
163
164
165
166
167

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
168
169
170
171
172
173
    .. testsetup::

       import numpy.random
       numpy.random.seed(42)
       gt.seed_rng(42)

174
    >>> from numpy import arange
175
    >>> from numpy.random import random
176
    >>> g = gt.random_graph(1000, lambda: (5, 5))
177
178
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
179
    >>> print(gt.edge_hist(g, eprop, linspace(0, 1, 11)))
180
    [array([ 483.,  462.,  467.,  493.,  498.,  486.,  515.,  552.,  496.,  548.]), array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ])]
181
182
183

    """

Tiago Peixoto's avatar
Tiago Peixoto committed
184
    ret = libgraph_tool_stats.\
185
186
          get_edge_histogram(g._Graph__graph, _prop("e", g, eprop),
                             [float(x) for x in bins])
187
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]
Tiago Peixoto's avatar
Tiago Peixoto committed
188

189

190
def vertex_average(g, deg):
191
192
193
194
195
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
196
    g : :class:`~graph_tool.Graph`
197
        Graph to be used.
198
    deg : string or :class:`~graph_tool.PropertyMap`
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
        Degree or property to be used for the histogram. It can be either "in",
        "out" or "total", for in-, out-, or total degree of the vertices. It can
        also be a vertex property map.

    Returns
    -------
    average : float
        The average of the given degree or property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    edge_average : Average of edge properties.
215
    distance_histogram : Shortest-distance histogram.
216
217
218
219
220
221
222
223
224

    Notes
    -----
    The algorithm runs in :math:`O(|V|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
225
226
227
228
229
230
231
    .. testsetup::

       import numpy.random
       numpy.random.seed(42)
       gt.seed_rng(42)

    >>> from numpy.random import poisson
232
    >>> g = gt.random_graph(1000, lambda: (poisson(5), poisson(5)))
233
    >>> print(gt.vertex_average(g, "in"))
Tiago Peixoto's avatar
Tiago Peixoto committed
234
    (4.982, 0.06855418295042251)
235
236
    """

237
238
239
240
    ret = libgraph_tool_stats.\
          get_vertex_average(g._Graph__graph, _degree(g, deg))
    return ret

241

242
def edge_average(g, eprop):
243
244
245
246
247
    """
    Return the average of the given degree or vertex property.

    Parameters
    ----------
248
    g : :class:`~graph_tool.Graph`
249
        Graph to be used.
250
    eprop : :class:`~graph_tool.PropertyMap`
251
252
253
254
255
256
257
258
259
260
261
262
263
264
        Edge property to be used for the histogram.

    Returns
    -------
    average : float
        The average of the given property.
    std : float
        The standard deviation of the average.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
265
    distance_histogram : Shortest-distance histogram.
266
267
268
269
270
271
272
273
274

    Notes
    -----
    The algorithm runs in :math:`O(|E|)` time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
275
276
277
278
279
280
    .. testsetup::

       import numpy.random
       numpy.random.seed(42)
       gt.seed_rng(42)

281
    >>> from numpy import arange
282
    >>> from numpy.random import random
283
    >>> g = gt.random_graph(1000, lambda: (5, 5))
284
285
    >>> eprop = g.new_edge_property("double")
    >>> eprop.get_array()[:] = random(g.num_edges())
286
    >>> print(gt.edge_average(g, eprop))
Tiago Peixoto's avatar
Tiago Peixoto committed
287
    (0.49849732125677476, 0.004086182531863621)
288
289
    """

290
291
292
293
    ret = libgraph_tool_stats.\
          get_edge_average(g._Graph__graph, _prop("e", g, eprop))
    return ret

294

295
def remove_labeled_edges(g, label):
296
    """Remove every edge `e` such that `label[e] != 0`."""
297
298
    u = GraphView(g, directed=True, reversed=g.is_reversed(),
                  skip_properties=True)
299
    libgraph_tool_stats.\
300
          remove_labeled_edges(u._Graph__graph, _prop("e", g, label))
301

302

Tiago Peixoto's avatar
Tiago Peixoto committed
303
def label_parallel_edges(g, mark_only=False, count_all=False, eprop=None):
304
305
    r"""Label edges which are parallel, i.e, have the same source and target
    vertices. For each parallel edge set :math:`PE`, the labelling starts from 0
Tiago Peixoto's avatar
Tiago Peixoto committed
306
307
308
309
310
311
312
313
314
    to :math:`|PE|-1`. (If `count_all==True`, the range is 0 to :math:`|PE|`
    instead). If `mark_only==True`, all parallel edges are simply marked with
    the value 1. If the `eprop` parameter is given
    (a :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
    if eprop is None:
        if mark_only:
            eprop = g.new_edge_property("bool")
        else:
            eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
315
    libgraph_tool_stats.\
Tiago Peixoto's avatar
Tiago Peixoto committed
316
317
          label_parallel_edges(g._Graph__graph, _prop("e", g, eprop),
                               mark_only, count_all)
318
    return eprop
Tiago Peixoto's avatar
Tiago Peixoto committed
319

320

321
def remove_parallel_edges(g):
322
323
    """Remove all parallel edges from the graph. Only one edge from each
    parallel edge set is left."""
324
325
326
    eprop = label_parallel_edges(g)
    remove_labeled_edges(g, eprop)

327

Tiago Peixoto's avatar
Tiago Peixoto committed
328
def label_self_loops(g, mark_only=False, eprop=None):
329
    """Label edges which are self-loops, i.e, the source and target vertices are
Tiago Peixoto's avatar
Tiago Peixoto committed
330
331
332
333
    the same. For each self-loop edge set :math:`SL`, the labelling starts from 0
    to :math:`|SL|-1`. If `mark_only == True`, self-loops are labeled with 1
    and others with 0. If the `eprop` parameter is given
    (a :class:`~graph_tool.PropertyMap`), the labelling is stored there."""
334

Tiago Peixoto's avatar
Tiago Peixoto committed
335
336
337
338
339
    if eprop is None:
        if mark_only:
            eprop = g.new_edge_property("bool")
        else:
            eprop = g.new_edge_property("int32_t")
Tiago Peixoto's avatar
Tiago Peixoto committed
340
    libgraph_tool_stats.\
Tiago Peixoto's avatar
Tiago Peixoto committed
341
342
          label_self_loops(g._Graph__graph, _prop("e", g, eprop),
                           mark_only)
343
    return eprop
344

345

346
def remove_self_loops(g):
347
    """Remove all self-loops edges from the graph."""
348
349
350
    eprop = label_self_loops(g)
    remove_labeled_edges(g, eprop)

351
352

def distance_histogram(g, weight=None, bins=[0, 1], samples=None,
353
354
355
                       float_count=True):
    r"""
    Return the shortest-distance histogram for each vertex pair in the graph.
356

357
358
359
360
361
362
    Parameters
    ----------
    g : :class:`Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: None)
        Edge weights.
363
    bins : list of bins (optional, default: [0, 1])
364
        List of bins to be used for the histogram. The values given represent
365
        the edges of the bins (i.e. lower and upper bounds). If the list
366
367
368
        contains two values, this will be used to automatically create an
        appropriate bin range, with a constant width given by the second value,
        and starting from the first value.
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
    samples : int (optional, default: None)
        If supplied, the distances will be randomly sampled from a number of
        source vertices given by this parameter. It `samples == None` (default),
        all pairs are used.
    float_count : bool (optional, default: True)
        If True, the counts in each histogram bin will be returned as floats. If
        False, they will be returned as integers.

    Returns
    -------
    counts : :class:`~numpy.ndarray`
        The bin counts.
    bins : :class:`~numpy.ndarray`
        The bin edges.

    See Also
    --------
    vertex_hist : Vertex histograms.
    edge_hist : Edge histograms.
    vertex_average : Average of vertex degree, properties.
    distance_histogram : Shortest-distance histogram.

    Notes
    -----
    The algorithm runs in :math:`O(V^2)` time, or :math:`O(V^2\log V)` if
    `weight != None`. If `samples` is supplied, the complexities are
    :math:`O(\text{samples}\times V)`  and
    :math:`O(\text{samples}\times V\log V)`, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
402
403
404
405
406
407
    .. testsetup::

       import numpy.random
       numpy.random.seed(42)
       gt.seed_rng(42)

408
409
    >>> g = gt.random_graph(100, lambda: (3, 3))
    >>> hist = gt.distance_histogram(g)
410
    >>> print(hist)
Tiago Peixoto's avatar
Tiago Peixoto committed
411
    [array([    0.,   300.,   866.,  2206.,  3893.,  2476.,   159.]), array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint64)]
412
    >>> hist = gt.distance_histogram(g, samples=10)
413
    >>> print(hist)
Tiago Peixoto's avatar
Tiago Peixoto committed
414
    [array([   0.,   30.,   84.,  217.,  385.,  249.,   25.]), array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint64)]
415
    """
Tiago Peixoto's avatar
Tiago Peixoto committed
416

417
418
419
    if samples != None:
        ret = libgraph_tool_stats.\
              sampled_distance_histogram(g._Graph__graph,
420
421
                                         _prop("e", g, weight),
                                         [float(x) for x in bins],
422
                                         samples, _get_rng())
423
424
425
    else:
        ret = libgraph_tool_stats.\
              distance_histogram(g._Graph__graph, _prop("e", g, weight), bins)
426
    return [array(ret[0], dtype="float64") if float_count else ret[0], ret[1]]