__init__.py 19.8 KB
Newer Older
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4 5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2017 Tiago de Paula Peixoto <tiago@skewed.de>
7 8 9 10 11 12 13 14 15 16 17 18 19 20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
"""
22 23
``graph_tool.clustering`` - Clustering coefficients
---------------------------------------------------
24 25 26

Provides algorithms for calculation of clustering coefficients,
aka. transitivity.
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41

Summary
+++++++

.. autosummary::
   :nosignatures:

   local_clustering
   global_clustering
   extended_clustering
   motifs
   motif_significance

Contents
++++++++
42 43
"""

44 45
from __future__ import division, absolute_import, print_function

Tiago Peixoto's avatar
Tiago Peixoto committed
46
from .. dl_import import dl_import
47
dl_import("from . import libgraph_tool_clustering as _gt")
48

49
from .. import _degree, _prop, Graph, GraphView, PropertyMap, _get_rng
50 51
from .. topology import isomorphism
from .. generation import random_rewire
52
from .. stats import vertex_hist
53

54
from collections import defaultdict
55
from numpy import *
56
from numpy import random
57
import sys
58

59
__all__ = ["local_clustering", "global_clustering", "extended_clustering",
60
           "motifs", "motif_significance"]
61

Tiago Peixoto's avatar
Tiago Peixoto committed
62

63
def local_clustering(g, prop=None, undirected=True):
64
    r"""
Tiago Peixoto's avatar
Tiago Peixoto committed
65
    Return the local clustering coefficients for all vertices.
66 67 68

    Parameters
    ----------
69
    g : :class:`~graph_tool.Graph`
70
        Graph to be used.
71
    prop : :class:`~graph_tool.PropertyMap` or string, optional
72 73
        Vertex property map where results will be stored. If specified, this
        parameter will also be the return value.
74
    undirected : bool (default: True)
75 76 77 78 79
        Calculate the *undirected* clustering coefficient, if graph is directed
        (this option has no effect if the graph is undirected).

    Returns
    -------
80
    prop : :class:`~graph_tool.PropertyMap`
81 82 83 84 85 86
        Vertex property containing the clustering coefficients.

    See Also
    --------
    global_clustering: global clustering coefficient
    extended_clustering: extended (generalized) clustering coefficient
87
    motifs: motif counting
88 89 90

    Notes
    -----
91 92
    The local clustering coefficient [watts-collective-1998]_ :math:`c_i` is
    defined as
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107

    .. math::
       c_i = \frac{|\{e_{jk}\}|}{k_i(k_i-1)} :\, v_j,v_k \in N_i,\, e_{jk} \in E

    where :math:`k_i` is the out-degree of vertex :math:`i`, and

    .. math::
       N_i = \{v_j : e_{ij} \in E\}

    is the set of out-neighbours of vertex :math:`i`. For undirected graphs the
    value of :math:`c_i` is normalized as

    .. math::
       c'_i = 2c_i.

108
    The implemented algorithm runs in :math:`O(|V|\left< k\right>^2)` time,
109 110 111 112 113 114
    where :math:`\left< k\right>` is the average out-degree.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
115 116 117 118 119 120
    .. testcode::
       :hide:

       np.random.seed(42)
       gt.seed_rng(42)

121
    >>> g = gt.random_graph(1000, lambda: (5,5))
122
    >>> clust = gt.local_clustering(g)
123
    >>> print(gt.vertex_average(g, clust))
Tiago Peixoto's avatar
Tiago Peixoto committed
124
    (0.006177777777777778, 0.00036966184079941211)
125 126 127

    References
    ----------
128
    .. [watts-collective-1998] D. J. Watts and Steven Strogatz, "Collective
Tiago Peixoto's avatar
Tiago Peixoto committed
129
       dynamics of 'small-world' networks", Nature, vol. 393, pp 440-442, 1998.
Tiago Peixoto's avatar
Tiago Peixoto committed
130
       :doi:`10.1038/30918`
131 132
    """

133
    if prop is None:
134
        prop = g.new_vertex_property("double")
135
    if g.is_directed() and undirected:
136
        g = GraphView(g, directed=False, skip_properties=True)
137
    _gt.local_clustering(g._Graph__graph, _prop("v", g, prop))
138 139
    return prop

Tiago Peixoto's avatar
Tiago Peixoto committed
140

141
def global_clustering(g):
142
    r"""
Tiago Peixoto's avatar
Tiago Peixoto committed
143
    Return the global clustering coefficient.
144 145 146

    Parameters
    ----------
147
    g : :class:`~graph_tool.Graph`
148 149 150 151 152 153 154 155 156 157 158
        Graph to be used.

    Returns
    -------
    c : tuple of floats
        Global clustering coefficient and standard deviation (jacknife method)

    See Also
    --------
    local_clustering: local clustering coefficient
    extended_clustering: extended (generalized) clustering coefficient
159
    motifs: motif counting
160 161 162

    Notes
    -----
163 164
    The global clustering coefficient [newman-structure-2003]_ :math:`c` is
    defined as
165 166 167 168 169

    .. math::
       c = 3 \times \frac{\text{number of triangles}}
                          {\text{number of connected triples}}

170
    The implemented algorithm runs in :math:`O(|V|\left< k\right>^2)` time,
171 172 173 174 175 176
    where :math:`\left< k\right>` is the average (total) degree.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
177 178 179 180 181 182
    .. testcode::
       :hide:

       np.random.seed(42)
       gt.seed_rng(42)

183
    >>> g = gt.random_graph(1000, lambda: (5,5))
184
    >>> print(gt.global_clustering(g))
185
    (0.006177777777777778, 0.0003700318726720...)
186 187 188

    References
    ----------
189
    .. [newman-structure-2003] M. E. J. Newman, "The structure and function of
Tiago Peixoto's avatar
Tiago Peixoto committed
190 191
       complex networks", SIAM Review, vol. 45, pp. 167-256, 2003,
       :doi:`10.1137/S003614450342480`
192 193
    """

194 195
    if g.is_directed():
        g = GraphView(g, directed=False, skip_properties=True)
Tiago Peixoto's avatar
Tiago Peixoto committed
196
    c = _gt.global_clustering(g._Graph__graph)
197 198
    return c

Tiago Peixoto's avatar
Tiago Peixoto committed
199

200 201
def extended_clustering(g, props=None, max_depth=3, undirected=False):
    r"""
Tiago Peixoto's avatar
Tiago Peixoto committed
202
    Return the extended clustering coefficients for all vertices.
203 204 205

    Parameters
    ----------
206
    g : :class:`~graph_tool.Graph`
207
        Graph to be used.
208
    props : list of :class:`~graph_tool.PropertyMap` objects, optional
209 210 211 212 213 214 215 216 217 218
        list of vertex property maps where results will be stored. If specified,
        this parameter will also be the return value.
    max_depth : int, optional
        Maximum clustering order (default: 3).
    undirected : bool, optional
        Calculate the *undirected* clustering coefficients, if graph is directed
        (this option has no effect if the graph is undirected).

    Returns
    -------
219
    prop : list of :class:`~graph_tool.PropertyMap` objects
220 221 222 223 224 225
        List of vertex properties containing the clustering coefficients.

    See Also
    --------
    local_clustering: local clustering coefficient
    global_clustering: global clustering coefficient
226
    motifs: motif counting
227 228 229

    Notes
    -----
230 231
    The extended clustering coefficient :math:`c^d_i` of order :math:`d` is
    defined as
232 233

    .. math::
234 235
       c^d_i = \frac{\left|\right\{ \{u,v\}; u,v \in N_i | d_{G(V\setminus
       \{i\})}(u,v) = d \left\}\right|}{{\left|N_i\right| \choose 2}},
236 237 238 239 240 241 242 243 244 245 246

    where :math:`d_G(u,v)` is the shortest distance from vertex :math:`u` to
    :math:`v` in graph :math:`G`, and

    .. math::
       N_i = \{v_j : e_{ij} \in E\}

    is the set of out-neighbours of :math:`i`. According to the above
    definition, we have that the traditional local clustering coefficient is
    recovered for :math:`d=1`, i.e., :math:`c^1_i = c_i`.

247
    The implemented algorithm runs in
Tiago Peixoto's avatar
Tiago Peixoto committed
248
    :math:`O(|V|\left<k\right>^{2+\text{max-depth}})` worst time, where
249
    :math:`\left< k\right>` is the average out-degree.
250 251 252 253 254

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
255 256 257 258 259 260
    .. testcode::
       :hide:

       np.random.seed(42)
       gt.seed_rng(42)

261
    >>> g = gt.random_graph(1000, lambda: (5,5))
262
    >>> clusts = gt.extended_clustering(g, max_depth=5)
263 264
    >>> for i in range(0, 5):
    ...    print(gt.vertex_average(g, clusts[i]))
Tiago Peixoto's avatar
Tiago Peixoto committed
265
    ...
266 267 268 269 270
    (0.00421, 0.00041685103920811...)
    (0.027226666666666666, 0.00100735228307788...)
    (0.11549166666666667, 0.00208139908541773...)
    (0.4128066666666666, 0.00294792212319872...)
    (0.4205716666666667, 0.00315646574114149...)
271 272 273

    References
    ----------
274
    .. [abdo-clustering] A. H. Abdo, A. P. S. de Moura, "Clustering as a
Tiago Peixoto's avatar
Tiago Peixoto committed
275
       measure of the local topology of networks", :arxiv:`physics/0605235`
276 277
    """

278
    if g.is_directed() and undirected:
279
        g = GraphView(g, directed=False)
Tiago Peixoto's avatar
Tiago Peixoto committed
280
    if props is None:
281
        props = []
282
        for i in range(0, max_depth):
283
            props.append(g.new_vertex_property("double"))
284 285
    _gt.extended_clustering(g._Graph__graph,
                            [_prop("v", g, p) for p in props])
286
    return props
287 288


289
def motifs(g, k, p=1.0, motif_list=None, return_maps=False):
290
    r"""
291 292 293
    Count the occurrence of k-size node-induced subgraphs (motifs). A tuple with
    two lists is returned: the list of motifs found, and the list with their
    respective counts.
294 295 296

    Parameters
    ----------
297
    g : :class:`~graph_tool.Graph`
298 299 300
        Graph to be used.
    k : int
        number of vertices of the motifs
301
    p : float or float list (optional, default: `1.0`)
302 303
        uniform fraction of the motifs to be sampled. If a float list is
        provided, it will be used as the fraction at each depth
304
        :math:`[1,\dots,k]` in the algorithm. See [wernicke-efficient-2006]_ for
305
        more details.
306
    motif_list : list of :class:`~graph_tool.Graph` objects, optional
307
        If supplied, the algorithms will only search for the motifs in this list
Tiago Peixoto's avatar
Tiago Peixoto committed
308
        (or isomorphisms).
309
    return_maps : bool (optional, default `False`)
Tiago Peixoto's avatar
Tiago Peixoto committed
310
        If ``True``, a list will be returned, which provides for each motif graph a
311 312
        list of vertex property maps which map the motif to its location in the
        main graph.
313 314 315

    Returns
    -------
316
    motifs : list of :class:`~graph_tool.Graph` objects
317 318 319 320 321 322
        List of motifs of size k found in the Graph. Graphs are grouped
        according to their isomorphism class, and only one of each class appears
        in this list. The list is sorted according to in-degree sequence,
        out-degree-sequence, and number of edges (in this order).
    counts : list of ints
        The number of times the respective motif in the motifs list was counted
323 324 325
    vertex_maps : list of lists of :class:`~graph_tool.PropertyMap` objects
        List for each motif graph containing the locations in the main
        graph. This is only returned if `return_maps == True`.
326 327 328

    See Also
    --------
329
    motif_significance: significance profile of motifs
330 331 332 333 334 335 336
    local_clustering: local clustering coefficient
    global_clustering: global clustering coefficient
    extended_clustering: extended (generalized) clustering coefficient

    Notes
    -----
    This functions implements the ESU and RAND-ESU algorithms described in
337
    [wernicke-efficient-2006]_.
338 339 340 341 342

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
343 344 345 346 347 348
    .. testcode::
       :hide:

       np.random.seed(42)
       gt.seed_rng(42)

349
    >>> g = gt.random_graph(1000, lambda: (5,5))
350
    >>> motifs, counts = gt.motifs(gt.GraphView(g, directed=False), 4)
351
    >>> print(len(motifs))
Tiago Peixoto's avatar
Tiago Peixoto committed
352
    13
353
    >>> print(counts)
Tiago Peixoto's avatar
Tiago Peixoto committed
354
    [115408, 388542, 1031, 1182, 2662, 2138, 833, 28, 16, 5, 5, 3, 4]
Tiago Peixoto's avatar
Tiago Peixoto committed
355

356 357 358

    References
    ----------
359
    .. [wernicke-efficient-2006] S. Wernicke, "Efficient detection of network
360 361
       motifs", IEEE/ACM Transactions on Computational Biology and
       Bioinformatics (TCBB), Volume 3, Issue 4, Pages 347-359, 2006.
Tiago Peixoto's avatar
Tiago Peixoto committed
362
       :doi:`10.1109/TCBB.2006.51`
363
    .. [induced-subgraph-isomorphism] http://en.wikipedia.org/wiki/Induced_subgraph_isomorphism_problem
364 365 366
    """

    sub_list = []
367
    directed_motifs = g.is_directed()
368

369
    if motif_list is not None:
370 371
        directed_motifs = motif_list[0].is_directed()
        for m in motif_list:
372
            if m.is_directed() != directed_motifs:
373
                raise ValueError("all motif graphs must be either directed or undirected!")
374 375 376 377
            if m.num_vertices() != k:
                raise ValueError("all motifs must have the same number of vertices: " + k)
            sub_list.append(m._Graph__graph)

378 379 380
    if directed_motifs != g.is_directed():
        raise ValueError("motifs do not have the same directionality as the graph itself!")

381
    if type(p) == float:
382
        pd = [1.0] * (k - 1)
383 384 385 386 387
        pd.append(p)
    if type(p) == list:
        pd = [float(x) for x in p]

    hist = []
388
    vertex_maps = []
389
    was_directed = g.is_directed()
390 391
    _gt.get_motifs(g._Graph__graph, k, sub_list, hist, vertex_maps,
                   return_maps,  pd, True, len(sub_list) == 0,
392
                   _get_rng())
393 394 395 396 397 398

    # assemble graphs
    temp = []
    for m in sub_list:
        mg = Graph()
        mg._Graph__graph = m
399
        mg.reindex_edges()
400 401 402
        temp.append(mg)
    sub_list = temp

403 404 405 406
    if return_maps:
        list_hist = list(zip(sub_list, hist, vertex_maps))
    else:
        list_hist = list(zip(sub_list, hist))
407
    # sort according to in-degree sequence
408
    list_hist.sort(key=lambda x: sorted([v.in_degree() for v in x[0].vertices()]))
409 410

    # sort according to out-degree sequence
411
    list_hist.sort(key=lambda x: sorted([v.out_degree() for v in x[0].vertices()]))
412 413

    # sort according to ascending number of edges
414
    list_hist.sort(key=lambda x: x[0].num_edges())
415 416 417 418

    sub_list = [x[0] for x in list_hist]
    hist = [x[1] for x in list_hist]

419
    if return_maps:
420
        vertex_maps = [x[2] for x in list_hist]
421 422 423 424
        for i, vlist in enumerate(vertex_maps):
            sub = sub_list[i]
            vertex_maps[i] = [PropertyMap(vm, sub, "v") for vm in vlist]
        return sub_list, hist, vertex_maps
425
    return sub_list, hist
426

Tiago Peixoto's avatar
Tiago Peixoto committed
427

428 429 430
def _graph_sig(g):
    """return the graph signature, i.e., the in and out degree distribution as
    concatenated as a tuple."""
431
    bins = list(range(0, g.num_vertices() + 1))
Tiago Peixoto's avatar
Tiago Peixoto committed
432
    in_dist = vertex_hist(g, "in", bins=bins if g.is_directed() else [0],
433
                          float_count=False)
Tiago Peixoto's avatar
Tiago Peixoto committed
434 435
    out_dist = vertex_hist(g, "out", bins=bins, float_count=False)
    sig = tuple([(in_dist[1][i], in_dist[0][i]) for \
436
                 i in range(len(in_dist[0]))] +
Tiago Peixoto's avatar
Tiago Peixoto committed
437
                [(out_dist[1][i], out_dist[0][i]) for\
438
                 i in range(len(out_dist[0]))])
439 440
    return sig

Tiago Peixoto's avatar
Tiago Peixoto committed
441

442
def motif_significance(g, k, n_shuffles=100, p=1.0, motif_list=None,
443
                       threshold=0, self_loops=False, parallel_edges=False,
444
                       full_output=False, shuffle_model="configuration"):
445 446 447 448 449 450 451
    r"""
    Obtain the motif significance profile, for subgraphs with k vertices. A
    tuple with two lists is returned: the list of motifs found, and their
    respective z-scores.

    Parameters
    ----------
452
    g : :class:`~graph_tool.Graph`
453 454
        Graph to be used.
    k : int
Tiago Peixoto's avatar
Tiago Peixoto committed
455
        Number of vertices of the motifs
456
    n_shuffles : int (optional, default: 100)
Tiago Peixoto's avatar
Tiago Peixoto committed
457
        Number of shuffled networks to consider for the z-score
458
    p : float or float list (optional, default: 1.0)
Tiago Peixoto's avatar
Tiago Peixoto committed
459
        Uniform fraction of the motifs to be sampled. If a float list is
460
        provided, it will be used as the fraction at each depth
461
        :math:`[1,\dots,k]` in the algorithm. See [wernicke-efficient-2006]_ for
462
        more details.
463
    motif_list : list of :class:`~graph_tool.Graph` objects (optional, default: None)
464
        If supplied, the algorithms will only search for the motifs in this list
465 466 467 468
        (isomorphisms)
    threshold : int (optional, default: 0)
        If a given motif count is below this level, it is not considered.
    self_loops : bool (optional, default: False)
469
        Whether or not the shuffled graphs are allowed to contain self-loops
470
    parallel_edges : bool (optional, default: False)
471 472
        Whether or not the shuffled graphs are allowed to contain parallel
        edges.
473
    full_output : bool (optional, default: False)
474 475 476 477
        If set to True, three additional lists are returned: the count
        of each motif, the average count of each motif in the shuffled networks,
        and the standard deviation of the average count of each motif in the
        shuffled networks.
478
    shuffle_model : string (optional, default: "configuration")
479 480
        Shuffle model to use. See :func:`~graph_tool.generation.random_rewire`
        for details.
481 482 483

    Returns
    -------
484
    motifs : list of :class:`~graph_tool.Graph` objects
485 486 487 488 489 490
        List of motifs of size k found in the Graph. Graphs are grouped
        according to their isomorphism class, and only one of each class appears
        in this list. The list is sorted according to in-degree sequence,
        out-degree-sequence, and number of edges (in this order).
    z-scores : list of floats
        The z-score of the respective motives. See below for the definition of
Tiago Peixoto's avatar
Tiago Peixoto committed
491
        the z-score.
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507

    See Also
    --------
    motifs: motif counting or sampling
    local_clustering: local clustering coefficient
    global_clustering: global clustering coefficient
    extended_clustering: extended (generalized) clustering coefficient

    Notes
    -----
    The z-score :math:`z_i` of motif i is defined as

    .. math::
         z_i = \frac{N_i - \left<N^s_i\right>}
         {\sqrt{\left<(N^s_i)^2\right> - \left<N^s_i\right>^2}},

508
    where :math:`N_i` is the number of times motif i found, and :math:`N^s_i`
509
    is the count of the same motif but on a shuffled network. It measures how
Tiago Peixoto's avatar
Tiago Peixoto committed
510
    many standard deviations is each motif count, in respect to an ensemble of
511 512 513 514 515 516 517 518
    randomly shuffled graphs with the same degree sequence.

    The z-scores values are not normalized.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
519
    >>> from numpy import random
520 521
    >>> random.seed(10)
    >>> g = gt.random_graph(100, lambda: (3,3))
522
    >>> motifs, zscores = gt.motif_significance(g, 3)
523
    >>> print(len(motifs))
524
    11
525
    >>> print(zscores)
526
    [0.04162670293683441, 0.046769781223679897, 0.55888261369689918, 0.82906624302416043, -0.41384527710386287, -0.42070845824900477, -1.3262411347517733, 1.82, -0.13, -0.37, -0.22]
527 528
    """

529
    s_ms, counts = motifs(g, k, p, motif_list)
530
    if threshold > 0:
531
        s_ms, counts = list(zip(*[x for x in zip(s_ms, counts) if x[1] > threshold]))
532 533
        s_ms = list(s_ms)
        counts = list(counts)
Tiago Peixoto's avatar
Tiago Peixoto committed
534 535
    s_counts = [0] * len(s_ms)
    s_dev = [0] * len(s_ms)
536

537 538
    # group subgraphs by number of edges
    m_e = defaultdict(lambda: [])
539
    for i in range(len(s_ms)):
540 541
        m_e[_graph_sig(s_ms[i])].append(i)

542 543
    # get samples
    sg = g.copy()
544
    for i in range(0, n_shuffles):
545
        random_rewire(sg, model=shuffle_model, self_loops=self_loops,
546
                      parallel_edges=parallel_edges)
547
        m_temp, count_temp = motifs(sg, k, p, motif_list)
548
        if threshold > 0:
549 550 551
            m_temp, count_temp = list(zip(*[x for x in zip(m_temp, count_temp) \
                                       if x[1] > threshold]))
        for j in range(0, len(m_temp)):
552
            found = False
553
            for l in m_e[_graph_sig(m_temp[j])]:
554 555 556
                if isomorphism(s_ms[l], m_temp[j]):
                    found = True
                    s_counts[l] += count_temp[j]
Tiago Peixoto's avatar
Tiago Peixoto committed
557
                    s_dev[l] += count_temp[j] ** 2
558 559 560
            if not found:
                s_ms.append(m_temp[j])
                s_counts.append(count_temp[j])
Tiago Peixoto's avatar
Tiago Peixoto committed
561
                s_dev.append(count_temp[j] ** 2)
562
                counts.append(0)
Tiago Peixoto's avatar
Tiago Peixoto committed
563
                m_e[_graph_sig(m_temp[j])].append(len(s_ms) - 1)
564

Tiago Peixoto's avatar
Tiago Peixoto committed
565 566
    s_counts = [x / float(n_shuffles) for x in s_counts]
    s_dev = [max(sqrt(x[0] / float(n_shuffles) - x[1] ** 2), 1) \
567
              for x in zip(s_dev, s_counts)]
568

569
    list_hist = list(zip(s_ms, s_counts, s_dev))
570
    # sort according to in-degree sequence
571
    list_hist.sort(key = lambda x: sorted([v.in_degree() for v in x[0].vertices()])),
572 573

    # sort according to out-degree sequence
574
    list_hist.sort(key = lambda x: sorted([v.out_degree() for v in x[0].vertices()]))
575 576

    # sort according to ascending number of edges
577
    list_hist.sort(key = lambda x: x[0].num_edges())
578

579
    s_ms, s_counts, s_dev = list(zip(*list_hist))
580

581
    zscore = [(x[0] - x[1]) / x[2] for x in zip(counts, s_counts, s_dev)]
582 583 584 585 586

    if full_output:
        return s_ms, zscore, counts, s_counts, s_dev
    else:
        return s_ms, zscore