#! /usr/bin/env python
# graph_tool.py -- a general graph manipulation python module
#
# Copyright (C) 2007 Tiago de Paula Peixoto
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
"""
``graph_tool.clustering`` - Clustering coefficients
---------------------------------------------------
Provides algorithms for calculation of clustering coefficients,
aka. transitivity.
"""
from .. dl_import import dl_import
dl_import("import libgraph_tool_clustering as _gt")
from .. core import _degree, _prop, Graph
from .. topology import isomorphism
from .. generation import random_rewire
from itertools import izip
from numpy import *
import sys
__all__ = ["local_clustering", "global_clustering", "extended_clustering",
"motifs", "motif_significance"]
def local_clustering(g, prop=None, undirected=False):
r"""
Return vertex property containing local clustering coefficients for all
vertices.
Parameters
----------
g : Graph
Graph to be used.
prop : PropertyMap or string, optional
Vertex property map where results will be stored. If specified, this
parameter will also be the return value.
undirected : bool, optional
Calculate the *undirected* clustering coefficient, if graph is directed
(this option has no effect if the graph is undirected).
Returns
-------
prop : PropertyMap
Vertex property containing the clustering coefficients.
See Also
--------
global_clustering: global clustering coefficient
extended_clustering: extended (generalized) clustering coefficient
motifs: motif counting
Notes
-----
The local clustering coefficient [1]_ :math:`c_i` is defined as
.. math::
c_i = \frac{|\{e_{jk}\}|}{k_i(k_i-1)} :\, v_j,v_k \in N_i,\, e_{jk} \in E
where :math:`k_i` is the out-degree of vertex :math:`i`, and
.. math::
N_i = \{v_j : e_{ij} \in E\}
is the set of out-neighbours of vertex :math:`i`. For undirected graphs the
value of :math:`c_i` is normalized as
.. math::
c'_i = 2c_i.
The implemented algorithm runs in :math:`O(|V|\left< k\right>^3)` time,
where :math:`\left< k\right>` is the average out-degree.
If enabled during compilation, this algorithm runs in parallel.
Examples
--------
>>> g = gt.random_graph(1000, lambda: (5,5), seed=42)
>>> clust = gt.local_clustering(g)
>>> print gt.vertex_average(g, clust)
(0.0042016666666666669, 0.00041579094306313759)
References
----------
.. [1] D. J. Watts and Steven Strogatz, "Collective dynamics of
'small-world' networks", Nature, vol. 393, pp 440-442, 1998.
doi:10.1038/30918
"""
if prop == None:
prop = g.new_vertex_property("double")
was_directed = g.is_directed()
if g.is_directed() and undirected:
g.set_directed(False)
try:
_gt.extended_clustering(g._Graph__graph,
[_prop("v", g, prop)])
finally:
if was_directed and undirected:
g.set_directed(True)
return prop
def global_clustering(g):
r"""
Return global clustering coefficients for graphs.
Parameters
----------
g : Graph
Graph to be used.
Returns
-------
c : tuple of floats
Global clustering coefficient and standard deviation (jacknife method)
See Also
--------
local_clustering: local clustering coefficient
extended_clustering: extended (generalized) clustering coefficient
motifs: motif counting
Notes
-----
The global clustering coefficient [1]_ :math:`c` is defined as
.. math::
c = 3 \times \frac{\text{number of triangles}}
{\text{number of connected triples}}
The implemented algorithm runs in :math:`O(|V|\left< k\right>^3)` time,
where :math:`\left< k\right>` is the average (total) degree.
If enabled during compilation, this algorithm runs in parallel.
Examples
--------
>>> g = gt.random_graph(1000, lambda: (5,5), seed=42)
>>> print gt.global_clustering(g)
(0.0083567321834469854, 0.0004417198625120785)
References
----------
.. [1] M. E. J. Newman, "The structure and function of complex networks",
SIAM Review, vol. 45, pp. 167-256, 2003
"""
c =_gt.global_clustering(g._Graph__graph)
return c
def extended_clustering(g, props=None, max_depth=3, undirected=False):
r"""
Return a list of vertex properties containing the extended clustering
coefficients for all vertices.
Parameters
----------
g : Graph
Graph to be used.
props : list of PropertyMap objects, optional
list of vertex property maps where results will be stored. If specified,
this parameter will also be the return value.
max_depth : int, optional
Maximum clustering order (default: 3).
undirected : bool, optional
Calculate the *undirected* clustering coefficients, if graph is directed
(this option has no effect if the graph is undirected).
Returns
-------
prop : list of PropertyMap objects
List of vertex properties containing the clustering coefficients.
See Also
--------
local_clustering: local clustering coefficient
global_clustering: global clustering coefficient
motifs: motif counting
Notes
-----
The definition of the extended clustering coefficient :math:`c^d_i` of order
:math:`d` is defined as
.. math::
c^d_i = \frac{\left|\right\{ \{u,v\}; u,v \in N_i | d_{G(V\setminus
\{i\})}(u,v) = d \left\}\right|}{{\left|N_i\right| \choose 2}},
where :math:`d_G(u,v)` is the shortest distance from vertex :math:`u` to
:math:`v` in graph :math:`G`, and
.. math::
N_i = \{v_j : e_{ij} \in E\}
is the set of out-neighbours of :math:`i`. According to the above
definition, we have that the traditional local clustering coefficient is
recovered for :math:`d=1`, i.e., :math:`c^1_i = c_i`.
The implemented algorithm runs in :math:`O(|V|\left<
k\right>^{2+\text{max_depth}})` worst time, where :math:`\left< k\right>` is
the average out-degree.
If enabled during compilation, this algorithm runs in parallel.
Examples
--------
>>> g = gt.random_graph(1000, lambda: (5,5), seed=42)
>>> clusts = gt.extended_clustering(g, max_depth=5)
>>> for i in xrange(0, 5):
... print gt.vertex_average(g, clusts[i])
...
(0.0042016666666666669, 0.00041579094306313759)
(0.02409, 0.00095845344754511225)
(0.11065333333333334, 0.0019404812805074933)
(0.40180499999999997, 0.0031866048246265693)
(0.43999999999999995, 0.0031172994010129269)
References
----------
.. [1] A. H. Abdo, A. P. S. de Moura, "Clustering as a measure of the local
topology of networks", arXiv:physics/0605235v4
"""
was_directed = g.is_directed()
if g.is_directed() and undirected:
g.set_directed(False)
if props == None:
props = []
for i in xrange(0, max_depth):
props.append(g.new_vertex_property("double"))
try:
_gt.extended_clustering(g._Graph__graph,
[_prop("v", g, p) for p in props])
finally:
if was_directed and undirected:
g.set_directed(True)
return props
def motifs(g, k, p=1.0, motif_list=None, undirected=None, seed=0):
r"""
Count the occurrence of k-size subgraphs (motifs). A tuple with two lists is
returned: the list of motifs found, and the list with their respective
counts.
Parameters
----------
g : Graph
Graph to be used.
k : int
number of vertices of the motifs
p : float or float list, optional (default: 1.0)
uniform fraction of the motifs to be sampled. If a float list is
provided, it will be used as the fraction at each depth
:math:`[1,\dots,k]` in the algorithm. See [wernicke_efficient_2006]_ for
more details.
motif_list : list of Graph objects, optional
If supplied, the algorithms will only search for the motifs in this list
(or isomorphisms thereof)
undirected : bool, optional
Treat the graph as *undirected*, if graph is directed
(this option has no effect if the graph is undirected).
seed : int, optional (default: 0)
Seed for the random number generator. It the value is 0, a random seed
is used.
Returns
-------
motifs : list of Graph objects
List of motifs of size k found in the Graph. Graphs are grouped
according to their isomorphism class, and only one of each class appears
in this list. The list is sorted according to in-degree sequence,
out-degree-sequence, and number of edges (in this order).
counts : list of ints
The number of times the respective motif in the motifs list was counted
See Also
--------
motif_significance: significance profile of motifs
local_clustering: local clustering coefficient
global_clustering: global clustering coefficient
extended_clustering: extended (generalized) clustering coefficient
Notes
-----
This functions implements the ESU and RAND-ESU algorithms described in
[wernicke_efficient_2006]_.
If enabled during compilation, this algorithm runs in parallel.
Examples
--------
>>> g = gt.random_graph(1000, lambda: (5,5), seed=42)
>>> motifs, counts = gt.motifs(g, 4, undirected=True)
>>> print len(motifs)
10
>>> print counts
[116138, 392344, 389, 445, 2938, 996, 792, 3, 14, 3]
References
----------
.. [wernicke_efficient_2006] S. Wernicke, "Efficient detection of network
motifs", IEEE/ACM Transactions on Computational Biology and
Bioinformatics (TCBB), Volume 3, Issue 4, Pages 347-359, 2006.
"""
if seed == 0:
seed = random.randint(0, sys.maxint)
sub_list = []
directed_motifs = g.is_directed() if undirected == None else not undirected
if motif_list != None:
directed_motifs = motif_list[0].is_directed()
for m in motif_list:
if m.is_directed() != directed_motifs:
raise ValueError("all motif graphs must be either directed or undirected")
if m.num_vertices() != k:
raise ValueError("all motifs must have the same number of vertices: " + k)
sub_list.append(m._Graph__graph)
if type(p) == float:
pd = [1.0]*(k-1)
pd.append(p)
if type(p) == list:
pd = [float(x) for x in p]
hist = []
was_directed = g.is_directed()
if g.is_directed() and not directed_motifs:
g.set_directed(False)
try:
_gt.get_motifs(g._Graph__graph, k, sub_list, hist, pd,
True, len(sub_list) == 0,
seed)
finally:
if was_directed and not directed_motifs:
g.set_directed(True)
# assemble graphs
temp = []
for m in sub_list:
mg = Graph()
mg._Graph__graph = m
temp.append(mg)
sub_list = temp
list_hist = zip(sub_list, hist)
# sort according to in-degree sequence
list_hist.sort(lambda x,y: cmp(sorted([v.in_degree() for v in x[0].vertices()]),
sorted([v.in_degree() for v in y[0].vertices()])))
# sort according to out-degree sequence
list_hist.sort(lambda x,y: cmp(sorted([v.out_degree() for v in x[0].vertices()]),
sorted([v.out_degree() for v in y[0].vertices()])))
# sort according to ascending number of edges
list_hist.sort(lambda x,y: cmp(x[0].num_edges(), y[0].num_edges()))
sub_list = [x[0] for x in list_hist]
hist = [x[1] for x in list_hist]
return sub_list, hist
def motif_significance(g, k, n_shuffles=100, p=1.0, motif_list=None,
undirected=None, self_loops=False, parallel_edges=False,
full_output=False, shuffle_strategy="uncorrelated",
seed=0):
r"""
Obtain the motif significance profile, for subgraphs with k vertices. A
tuple with two lists is returned: the list of motifs found, and their
respective z-scores.
Parameters
----------
g : Graph
Graph to be used.
k : int
number of vertices of the motifs
n_shuffles : int, optional (default: 100)
number of shuffled networks to consider for the z-score
p : float or float list, optional (default: 1.0)
uniform fraction of the motifs to be sampled. If a float list is
provided, it will be used as the fraction at each depth
:math:`[1,\dots,k]` in the algorithm. See [wernicke_efficient_2006]_ for
more details.
motif_list : list of Graph objects, optional
If supplied, the algorithms will only search for the motifs in this list
(or isomorphisms thereof)
undirected : bool, optional
Treat the graph as *undirected*, if graph is directed
(this option has no effect if the graph is undirected).
self_loops : bool, optional (default: False)
Whether or not the shuffled graphs are allowed to contain self-loops
parallel_edges : bool, optional (default: False)
Whether or not the shuffled graphs are allowed to contain parallel
edges.
full_output : bool, optional (default: False)
If set to True, three additional lists are returned: the count
of each motif, the average count of each motif in the shuffled networks,
and the standard deviation of the average count of each motif in the
shuffled networks.
shuffle_strategy : string, optional (default: "uncorrelated")
Shuffle strategy to use. Can be either "correlated" or "uncorrelated".
See random_rewire() for details.
seed : int, optional (default: 0)
Seed for the random number generator. It the value is 0, a random seed
is used.
Returns
-------
motifs : list of Graph objects
List of motifs of size k found in the Graph. Graphs are grouped
according to their isomorphism class, and only one of each class appears
in this list. The list is sorted according to in-degree sequence,
out-degree-sequence, and number of edges (in this order).
z-scores : list of floats
The z-score of the respective motives. See below for the definition of
a z-score.
See Also
--------
motifs: motif counting or sampling
local_clustering: local clustering coefficient
global_clustering: global clustering coefficient
extended_clustering: extended (generalized) clustering coefficient
Notes
-----
The z-score :math:`z_i` of motif i is defined as
.. math::
z_i = \frac{N_i - \left}
{\sqrt{\left<(N^s_i)^2\right> - \left^2}},
where :math:`N_i` is the number of times motif i found, and :math:`N^s_i`
is the count of the same motif but on a shuffled network. It measures how
many standard deviations is each motif count, in respect to a ensemble of
randomly shuffled graphs with the same degree sequence.
The z-scores values are not normalized.
If enabled during compilation, this algorithm runs in parallel.
Examples
--------
>>> g = gt.random_graph(100, lambda: (3,3), seed=10)
>>> motifs, zscores = gt.motif_significance(g, 3)
>>> print len(motifs)
12
>>> print zscores
[1.6197267471265697, 1.6271265351937325, 0.99350347553112339, -1.4729502635694927, -1.1004922497513825, -1.1181853811005562, 0.92339606894139736, -0.11, -0.10000000000000001, -0.28999999999999998, -0.22, -0.01]
"""
s_ms, counts = motifs(g, k, p, motif_list, undirected, seed)
s_counts = [0]*len(s_ms)
s_dev = [0]*len(s_ms)
# get samples
sg = g.copy()
for i in xrange(0, n_shuffles):
random_rewire(sg, shuffle_strategy, self_loops=self_loops,
parallel_edges=parallel_edges)
m_temp, count_temp = motifs(sg, k, p, motif_list, undirected, seed)
for j in xrange(0, len(m_temp)):
found = False
for l in xrange(0, len(s_ms)):
if isomorphism(s_ms[l], m_temp[j]):
found = True
s_counts[l] += count_temp[j]
s_dev[l] += count_temp[j]**2
if not found:
s_ms.append(m_temp[j])
s_counts.append(count_temp[j])
s_dev.append(count_temp[j]**2)
counts.append(0)
s_counts = [ x/float(n_shuffles) for x in s_counts ]
s_dev = [ max(sqrt(x[0]/float(n_shuffles) - x[1]**2),1) \
for x in izip(s_dev,s_counts) ]
list_hist = zip(s_ms, s_counts, s_dev)
# sort according to in-degree sequence
list_hist.sort(lambda x,y: cmp(sorted([v.in_degree()\
for v in x[0].vertices()]),
sorted([v.in_degree()\
for v in y[0].vertices()])))
# sort according to out-degree sequence
list_hist.sort(lambda x,y: cmp(sorted([v.out_degree()\
for v in x[0].vertices()]),
sorted([v.out_degree()\
for v in y[0].vertices()])))
# sort according to ascending number of edges
list_hist.sort(lambda x,y: cmp(x[0].num_edges(), y[0].num_edges()))
s_ms, s_counts, s_dev = zip(*list_hist)
zscore = [(x[0] - x[1])/x[2] for x in izip(counts, s_counts, s_dev)]
if full_output:
return s_ms, zscore, counts, s_counts, s_dev
else:
return s_ms, zscore