__init__.py 39.5 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
Tiago Peixoto's avatar
Tiago Peixoto committed
3
#
4 5
# graph_tool -- a general graph manipulation python module
#
Tiago Peixoto's avatar
Tiago Peixoto committed
6
# Copyright (C) 2006-2013 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

21
"""
22 23
``graph_tool.centrality`` - Centrality measures
-----------------------------------------------
24 25

This module includes centrality-related algorithms.
26 27 28 29 30 31 32 33 34 35

Summary
+++++++

.. autosummary::
   :nosignatures:

   pagerank
   betweenness
   central_point_dominance
Tiago Peixoto's avatar
Tiago Peixoto committed
36
   closeness
37
   eigenvector
Tiago Peixoto's avatar
Tiago Peixoto committed
38
   katz
39
   hits
40
   eigentrust
41
   trust_transitivity
42 43 44

Contents
++++++++
45 46
"""

47 48
from __future__ import division, absolute_import, print_function

Tiago Peixoto's avatar
Tiago Peixoto committed
49
from .. dl_import import dl_import
50
dl_import("from . import libgraph_tool_centrality")
Tiago Peixoto's avatar
Tiago Peixoto committed
51

52
from .. import _prop, ungroup_vector_property
53
from .. topology import shortest_distance
Tiago Peixoto's avatar
Tiago Peixoto committed
54 55
import sys
import numpy
Tiago Peixoto's avatar
Tiago Peixoto committed
56

Tiago Peixoto's avatar
Tiago Peixoto committed
57 58
__all__ = ["pagerank", "betweenness", "central_point_dominance", "closeness",
           "eigentrust", "eigenvector", "katz", "hits", "trust_transitivity"]
Tiago Peixoto's avatar
Tiago Peixoto committed
59

Tiago Peixoto's avatar
Tiago Peixoto committed
60

61 62
def pagerank(g, damping=0.85, pers=None, weight=None, prop=None, epsilon=1e-6,
             max_iter=None, ret_iter=False):
63 64 65 66 67
    r"""
    Calculate the PageRank of each vertex.

    Parameters
    ----------
68
    g : :class:`~graph_tool.Graph`
69
        Graph to be used.
70
    damping : float, optional (default: 0.85)
71
        Damping factor.
72 73 74 75 76
    pers : :class:`~graph_tool.PropertyMap`, optional (default: None)
        Personalization vector. If omitted, a constant value of :math:`1/N`
        will be used.
    weight : :class:`~graph_tool.PropertyMap`, optional (default: None)
        Edge weights. If omitted, a constant value of 1 will be used.
77
    prop : :class:`~graph_tool.PropertyMap`, optional (default: None)
78 79
        Vertex property map to store the PageRank values. If supplied, it will
        be used uninitialized.
Tiago Peixoto's avatar
Tiago Peixoto committed
80
    epsilon : float, optional (default: 1e-6)
81 82 83 84 85 86 87 88 89
        Convergence condition. The iteration will stop if the total delta of all
        vertices are below this value.
    max_iter : int, optional (default: None)
        If supplied, this will limit the total number of iterations.
    ret_iter : bool, optional (default: False)
        If true, the total number of iterations is also returned.

    Returns
    -------
90 91
    pagerank : :class:`~graph_tool.PropertyMap`
        A vertex property map containing the PageRank values.
92 93 94 95 96

    See Also
    --------
    betweenness: betweenness centrality
    eigentrust: eigentrust centrality
97 98
    eigenvector: eigenvector centrality
    hits: hubs and authority centralities
99
    trust_transitivity: pervasive trust transitivity
100 101 102

    Notes
    -----
Tiago Peixoto's avatar
Tiago Peixoto committed
103 104
    The value of PageRank [pagerank-wikipedia]_ of vertex v, :math:`PR(v)`, is
    given iteratively by the relation:
105 106

    .. math::
107

108 109
        PR(v) = \frac{1-d}{N} + d \sum_{u \in \Gamma^{-}(v)}
                \frac{PR (u)}{d^{+}(u)}
110 111 112 113

    where :math:`\Gamma^{-}(v)` are the in-neighbours of v, :math:`d^{+}(w)` is
    the out-degree of w, and d is a damping factor.

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
    If a personalization property :math:`p(v)` is given, the definition becomes:

    .. math::

        PR(v) = (1-d)p(v) + d \sum_{u \in \Gamma^{-}(v)}
                \frac{PR (u)}{d^{+}(u)}

    If edge weights are also given, the equation is then generalized to:

    .. math::

        PR(v) = (1-d)p(v) + d \sum_{u \in \Gamma^{-}(v)}
                \frac{PR (u) w_{u\to v}}{d^{+}(u)}

    where :math:`d^{+}(u)=\sum_{y}A_{u,y}w_{u\to y}` is redefined to be the sum
    of the weights of the out-going edges from u.

    The implemented algorithm progressively iterates the above equations, until
Tiago Peixoto's avatar
Tiago Peixoto committed
132
    it no longer changes, according to the parameter epsilon. It has a
133 134 135 136 137 138
    topology-dependent running time.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

    .. doctest:: pagerank

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> pr = gt.pagerank(g)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=pr,
       ...               vertex_size=gt.prop_to_size(pr, mi=5, ma=15),
       ...               vorder=pr, output="polblogs_pr.pdf")
       <...>

    .. testcode:: pagerank
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=pr,
                     vertex_size=gt.prop_to_size(pr, mi=5, ma=15),
                     vorder=pr, output="polblogs_pr.png")


    .. figure:: polblogs_pr.*
       :align: center

       PageRank values of the a political blogs network of [adamic-polblogs]_.
162 163 164

    Now with a personalization vector, and edge weights:

165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
    .. doctest:: pagerank

       >>> d = g.degree_property_map("total")
       >>> periphery = d.a <= 2
       >>> p = g.new_vertex_property("double")
       >>> p.a[periphery] = 100
       >>> pr = gt.pagerank(g, pers=p)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=pr,
       ...               vertex_size=gt.prop_to_size(pr, mi=5, ma=15),
       ...               vorder=pr, output="polblogs_pr_pers.pdf")
       <...>

    .. testcode:: pagerank
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=pr,
                     vertex_size=gt.prop_to_size(pr, mi=5, ma=15),
                     vorder=pr, output="polblogs_pr_pers.png")


    .. figure:: polblogs_pr_pers.*
       :align: center

       Personalized PageRank values of the a political blogs network of
       [adamic-polblogs]_, where vertices with very low degree are given
       artificially high scores.
191 192 193

    References
    ----------
194 195
    .. [pagerank-wikipedia] http://en.wikipedia.org/wiki/Pagerank
    .. [lawrence-pagerank-1998] P. Lawrence, B. Sergey, M. Rajeev, W. Terry,
196
       "The pagerank citation ranking: Bringing order to the web", Technical
197
       report, Stanford University, 1998
198 199 200
    .. [Langville-survey-2005] A. N. Langville, C. D. Meyer, "A Survey of
       Eigenvector Methods for Web Information Retrieval", SIAM Review, vol. 47,
       no. 1, pp. 135-161, 2005, :DOI:`10.1137/S0036144503424786`
201 202 203
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
204 205 206 207
    """

    if max_iter == None:
        max_iter = 0
Tiago Peixoto's avatar
Tiago Peixoto committed
208 209
    if prop == None:
        prop = g.new_vertex_property("double")
210 211
        N = len(prop.a)
        prop.a = pers.a[:N] if pers is not None else 1. / g.num_vertices()
Tiago Peixoto's avatar
Tiago Peixoto committed
212
    ic = libgraph_tool_centrality.\
213 214 215
            get_pagerank(g._Graph__graph, _prop("v", g, prop),
                         _prop("v", g, pers), _prop("e", g, weight),
                         damping, epsilon, max_iter)
Tiago Peixoto's avatar
Tiago Peixoto committed
216 217 218 219 220
    if ret_iter:
        return prop, ic
    else:
        return prop

Tiago Peixoto's avatar
Tiago Peixoto committed
221

222 223 224 225 226 227
def betweenness(g, vprop=None, eprop=None, weight=None, norm=True):
    r"""
    Calculate the betweenness centrality for each vertex and edge.

    Parameters
    ----------
228
    g : :class:`~graph_tool.Graph`
229
        Graph to be used.
230
    vprop : :class:`~graph_tool.PropertyMap`, optional (default: None)
231
        Vertex property map to store the vertex betweenness values.
232
    eprop : :class:`~graph_tool.PropertyMap`, optional (default: None)
233
        Edge property map to store the edge betweenness values.
234
    weight : :class:`~graph_tool.PropertyMap`, optional (default: None)
235 236 237 238 239 240
        Edge property map corresponding to the weight value of each edge.
    norm : bool, optional (default: True)
        Whether or not the betweenness values should be normalized.

    Returns
    -------
Tiago Peixoto's avatar
Tiago Peixoto committed
241 242
    vertex_betweenness : A vertex property map with the vertex betweenness values.
    edge_betweenness : An edge property map with the edge betweenness values.
243 244 245 246 247 248

    See Also
    --------
    central_point_dominance: central point dominance of the graph
    pagerank: PageRank centrality
    eigentrust: eigentrust centrality
249 250
    eigenvector: eigenvector centrality
    hits: hubs and authority centralities
251
    trust_transitivity: pervasive trust transitivity
252 253 254 255 256

    Notes
    -----
    Betweenness centrality of a vertex :math:`C_B(v)` is defined as,

257 258
    .. math::

259 260 261 262 263 264 265 266 267
        C_B(v)= \sum_{s \neq v \neq t \in V \atop s \neq t}
                \frac{\sigma_{st}(v)}{\sigma_{st}}

    where :math:`\sigma_{st}` is the number of shortest geodesic paths from s to
    t, and :math:`\sigma_{st}(v)` is the number of shortest geodesic paths from
    s to t that pass through a vertex v.  This may be normalised by dividing
    through the number of pairs of vertices not including v, which is
    :math:`(n-1)(n-2)/2`.

268
    The algorithm used here is defined in [brandes-faster-2001]_, and has a
269 270 271 272 273 274 275
    complexity of :math:`O(VE)` for unweighted graphs and :math:`O(VE + V(V+E)
    \log V)` for weighted graphs. The space complexity is :math:`O(VE)`.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300

    .. doctest:: betweenness

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> vp, ep = gt.betweenness(g)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=vp,
       ...               vertex_size=gt.prop_to_size(vp, mi=5, ma=15),
       ...               edge_pen_width=gt.prop_to_size(ep, mi=0.5, ma=5),
       ...               vorder=vp, output="polblogs_betweenness.pdf")
       <...>

    .. testcode:: betweenness
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=vp,
                     vertex_size=gt.prop_to_size(vp, mi=5, ma=15),
                     edge_pen_width=gt.prop_to_size(ep, mi=0.5, ma=5),
                     vorder=vp, output="polblogs_betweenness.png")


    .. figure:: polblogs_betweenness.*
       :align: center

       Betweenness values of the a political blogs network of [adamic-polblogs]_.
301 302 303

    References
    ----------
304 305
    .. [betweenness-wikipedia] http://en.wikipedia.org/wiki/Centrality#Betweenness_centrality
    .. [brandes-faster-2001] U. Brandes, "A faster algorithm for betweenness
Tiago Peixoto's avatar
Tiago Peixoto committed
306
       centrality", Journal of Mathematical Sociology, 2001, :doi:`10.1080/0022250X.2001.9990249`
307 308 309
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
310
    """
Tiago Peixoto's avatar
Tiago Peixoto committed
311 312 313 314 315 316 317 318 319 320 321 322 323
    if vprop == None:
        vprop = g.new_vertex_property("double")
    if eprop == None:
        eprop = g.new_edge_property("double")
    if weight != None and weight.value_type() != eprop.value_type():
        nw = g.new_edge_property(eprop.value_type())
        g.copy_property(weight, nw)
        weight = nw
    libgraph_tool_centrality.\
            get_betweenness(g._Graph__graph, _prop("e", g, weight),
                            _prop("e", g, eprop), _prop("v", g, vprop), norm)
    return vprop, eprop

Tiago Peixoto's avatar
Tiago Peixoto committed
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
def closeness(g, weight=None, source=None, vprop=None, norm=True, harmonic=False):
    r"""
    Calculate the closeness centrality for each vertex.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap`, optional (default: None)
        Edge property map corresponding to the weight value of each edge.
    source : :class:`~graph_tool.Vertex`, optional (default: ``None``)
        If specified, the centrality is computed for this vertex alone.
    vprop : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
        Vertex property map to store the vertex centrality values.
    norm : bool, optional (default: ``True``)
        Whether or not the centrality values should be normalized.
    harmonic : bool, optional (default: ``False``)
        If true, the sum of the inverse of the distances will be computed,
        instead of the inverse of the sum.

    Returns
    -------
    vertex_closeness : :class:`~graph_tool.PropertyMap`
        A vertex property map with the vertex closeness values.

    See Also
    --------
    central_point_dominance: central point dominance of the graph
    pagerank: PageRank centrality
    eigentrust: eigentrust centrality
    eigenvector: eigenvector centrality
    hits: hubs and authority centralities
    trust_transitivity: pervasive trust transitivity

    Notes
    -----
    The closeness centrality of a vertex :math:`i` is defined as,

    .. math::

        c_i = \frac{1}{\sum_j d_{ij}}

    where :math:`d_{ij}` is the (possibly directed and/or weighted) distance
    from :math:`i` to :math:`j`. In case there is no path between the two
    vertices, here the distance is taken to be zero.

    If ``harmonic == True``, the definition becomes

    .. math::

        c_i = \sum_j\frac{1}{d_{ij}},

    but now, in case there is no path between the two vertices, we take
    :math:`d_{ij} \to\infty` such that :math:`1/d_{ij}=0`.

    If ``norm == True``, the values of :math:`c_i` are normalized by
    :math:`n_i-1` where :math:`n_i` is the size of the (out-) component of
    :math:`i`. If ``harmonic == True``, they are instead simply normalized by
    :math:`N-1`.

    The algorithm complexity of :math:`O(N(N + E))` for unweighted graphs and
    :math:`O(N(N+E) \log N)` for weighted graphs. If the option ``source`` is
    specified, this drops to :math:`O(N + E)` and :math:`O((N+E)\log N)`
    respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------

    .. doctest:: closeness

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> c = gt.closeness(g)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=c,
       ...               vertex_size=gt.prop_to_size(c, mi=5, ma=15),
       ...               vorder=c, output="polblogs_closeness.pdf")
       <...>

    .. testcode:: closeness
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=c,
                     vertex_size=gt.prop_to_size(c, mi=5, ma=15),
                     vorder=c, output="polblogs_closeness.png")


    .. figure:: polblogs_closeness.*
       :align: center

       Closeness values of the a political blogs network of [adamic-polblogs]_.

    References
    ----------
    .. [closeness-wikipedia] https://en.wikipedia.org/wiki/Closeness_centrality
    .. [opsahl-node-2010] Opsahl, T., Agneessens, F., Skvoretz, J., "Node
       centrality in weighted networks: Generalizing degree and shortest
       paths". Social Networks 32, 245-251, 2010 :DOI:`10.1016/j.socnet.2010.03.006`
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`

    """
    if source is None:
        if vprop == None:
            vprop = g.new_vertex_property("double")
        libgraph_tool_centrality.\
            closeness(g._Graph__graph, _prop("e", g, weight),
                      _prop("v", g, vprop), harmonic, norm)
        return vprop
    else:
        max_dist = g.num_vertices() + 1
437
        dist = shortest_distance(g, source=source, weights=weight,
Tiago Peixoto's avatar
Tiago Peixoto committed
438
                                 max_dist=max_dist)
439
        dists = dist.fa[(dist.fa < max_dist) * (dist.fa > 0)]
Tiago Peixoto's avatar
Tiago Peixoto committed
440 441 442 443 444 445 446
        if harmonic:
            c = (1. / dists).sum()
            if norm:
                c /= g.num_vertices() - 1
        else:
            c = 1. / dists.sum()
            if norm:
447 448
                c *= len(dists)
        return c
Tiago Peixoto's avatar
Tiago Peixoto committed
449

Tiago Peixoto's avatar
Tiago Peixoto committed
450

Tiago Peixoto's avatar
Tiago Peixoto committed
451
def central_point_dominance(g, betweenness):
452 453 454 455 456 457
    r"""
    Calculate the central point dominance of the graph, given the betweenness
    centrality of each vertex.

    Parameters
    ----------
458
    g : :class:`~graph_tool.Graph`
459
        Graph to be used.
460
    betweenness : :class:`~graph_tool.PropertyMap`
461 462 463 464 465
        Vertex property map with the betweenness centrality values. The values
        must be normalized.

    Returns
    -------
466 467
    cp : float
        The central point dominance.
468 469 470 471 472 473 474 475

    See Also
    --------
    betweenness: betweenness centrality

    Notes
    -----
    Let :math:`v^*` be the vertex with the largest relative betweenness
476
    centrality; then, the central point dominance [freeman-set-1977]_ is defined
477 478
    as:

479 480
    .. math::

481 482 483 484 485 486 487 488 489
        C'_B = \frac{1}{|V|-1} \sum_{v} C_B(v^*) - C_B(v)

    where :math:`C_B(v)` is the normalized betweenness centrality of vertex
    v. The value of :math:`C_B` lies in the range [0,1].

    The algorithm has a complexity of :math:`O(V)`.

    Examples
    --------
490 491 492 493 494 495

    >>> g = gt.collection.data["polblogs"]
    >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
    >>> vp, ep = gt.betweenness(g)
    >>> print(gt.central_point_dominance(g, vp))
    0.11610685614353008
496 497 498

    References
    ----------
499
    .. [freeman-set-1977] Linton C. Freeman, "A Set of Measures of Centrality
Tiago Peixoto's avatar
Tiago Peixoto committed
500 501
       Based on Betweenness", Sociometry, Vol. 40, No. 1,  pp. 35-41, 1977,
       `http://www.jstor.org/stable/3033543 <http://www.jstor.org/stable/3033543>`_
502 503
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
504
    return libgraph_tool_centrality.\
505
           get_central_point_dominance(g._Graph__graph,
Tiago Peixoto's avatar
Tiago Peixoto committed
506 507
                                       _prop("v", g, betweenness))

508

509 510 511 512 513 514 515 516 517
def eigenvector(g, weight=None, vprop=None, epsilon=1e-6, max_iter=None):
    r"""
    Calculate the eigenvector centrality of each vertex in the graph, as well as
    the largest eigenvalue.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
518
    weight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
519 520
        Edge property map with the edge weights.
    vprop : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
521 522
        Vertex property map where the values of eigenvector must be stored. If
        provided, it will be used uninitialized.
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
    epsilon : float, optional (default: ``1e-6``)
        Convergence condition. The iteration will stop if the total delta of all
        vertices are below this value.
    max_iter : int, optional (default: ``None``)
        If supplied, this will limit the total number of iterations.

    Returns
    -------
    eigenvalue : float
        The largest eigenvalue of the (weighted) adjacency matrix.
    eigenvector : :class:`~graph_tool.PropertyMap`
        A vertex property map containing the eigenvector values.

    See Also
    --------
    betweenness: betweenness centrality
    pagerank: PageRank centrality
540
    hits: hubs and authority centralities
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
    trust_transitivity: pervasive trust transitivity

    Notes
    -----

    The eigenvector centrality :math:`\mathbf{x}` is the eigenvector of the
    (weighted) adjacency matrix with the largest eigenvalue :math:`\lambda`,
    i.e. it is the solution of

    .. math::

        \mathbf{A}\mathbf{x} = \lambda\mathbf{x},


    where :math:`\mathbf{A}` is the (weighted) adjacency matrix and
    :math:`\lambda` is the largest eigenvalue.

    The algorithm uses the power method which has a topology-dependent complexity of
    :math:`O\left(N\times\frac{-\log\epsilon}{\log|\lambda_1/\lambda_2|}\right)`,
    where :math:`N` is the number of vertices, :math:`\epsilon` is the ``epsilon``
    parameter, and :math:`\lambda_1` and :math:`\lambda_2` are the largest and
    second largest eigenvalues of the (weighted) adjacency matrix, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
    .. testsetup:: eigenvector

       np.random.seed(42)

    .. doctest:: eigenvector

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> w = g.new_edge_property("double")
       >>> w.a = np.random.random(len(w.a)) * 42
       >>> ee, x = gt.eigenvector(g, w)
       >>> print(ee)
       0.0013713102236792602
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=x,
       ...               vertex_size=gt.prop_to_size(x, mi=5, ma=15),
       ...               vorder=x, output="polblogs_eigenvector.pdf")
       <...>

    .. testcode:: eigenvector
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=x,
                     vertex_size=gt.prop_to_size(x, mi=5, ma=15),
                     vorder=x, output="polblogs_eigenvector.png")


    .. figure:: polblogs_eigenvector.*
       :align: center

       Eigenvector values of the a political blogs network of
       [adamic-polblogs]_, with random weights attributed to the edges.
599 600 601 602 603 604 605 606 607

    References
    ----------

    .. [eigenvector-centrality] http://en.wikipedia.org/wiki/Centrality#Eigenvector_centrality
    .. [power-method] http://en.wikipedia.org/wiki/Power_iteration
    .. [langville-survey-2005] A. N. Langville, C. D. Meyer, "A Survey of
       Eigenvector Methods for Web Information Retrieval", SIAM Review, vol. 47,
       no. 1, pp. 135-161, 2005, :DOI:`10.1137/S0036144503424786`
608 609 610
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
611 612 613

    """

614
    if vprop is None:
615
        vprop = g.new_vertex_property("double")
616
        vprop.a = 1. / g.num_vertices()
617 618 619 620 621 622 623 624
    if max_iter is None:
        max_iter = 0
    ee = libgraph_tool_centrality.\
         get_eigenvector(g._Graph__graph, _prop("e", g, weight),
                         _prop("v", g, vprop), epsilon, max_iter)
    return ee, vprop


Tiago Peixoto's avatar
Tiago Peixoto committed
625 626
def katz(g, alpha=0.01, beta=None, weight=None, vprop=None, epsilon=1e-6, max_iter=None):
    r"""
Tiago Peixoto's avatar
Tiago Peixoto committed
627
    Calculate the Katz centrality of each vertex in the graph.
Tiago Peixoto's avatar
Tiago Peixoto committed
628 629 630 631 632 633 634 635

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge property map with the edge weights.
    alpha : float, optional (default: ``0.01``)
636 637
        Free parameter :math:`\alpha`. This must be smaller than the inverse of
        the largest eigenvalue of the adjacency matrix.
Tiago Peixoto's avatar
Tiago Peixoto committed
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
    beta : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
        Vertex property map where the local personalization values. If not
        provided, the global value of 1 will be used.
    vprop : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
        Vertex property map where the values of eigenvector must be stored. If
        provided, it will be used uninitialized.
    epsilon : float, optional (default: ``1e-6``)
        Convergence condition. The iteration will stop if the total delta of all
        vertices are below this value.
    max_iter : int, optional (default: ``None``)
        If supplied, this will limit the total number of iterations.

    Returns
    -------
    centrality : :class:`~graph_tool.PropertyMap`
        A vertex property map containing the Katz centrality values.

    See Also
    --------
    betweenness: betweenness centrality
    pagerank: PageRank centrality
    eigenvector: eigenvector centrality
    hits: hubs and authority centralities
    trust_transitivity: pervasive trust transitivity

    Notes
    -----

    The Katz centrality :math:`\mathbf{x}` is the solution of the nonhomogeneous
    linear system

    .. math::

        \mathbf{x} = \alpha\mathbf{A}\mathbf{x} + \mathbf{\beta},


    where :math:`\mathbf{A}` is the (weighted) adjacency matrix and
    :math:`\mathbf{\beta}` is the personalization vector (if not supplied,
    :math:`\mathbf{\beta} = \mathbf{1}` is assumed).

    The algorithm uses successive iterations of the equation above, which has a
    topology-dependent convergence complexity.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
    .. testsetup:: katz

       np.random.seed(42)

    .. doctest:: katz

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> w = g.new_edge_property("double")
       >>> w.a = np.random.random(len(w.a)) * 42
       >>> x = gt.katz(g, weight=w)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=x,
       ...               vertex_size=gt.prop_to_size(x, mi=5, ma=15),
       ...               vorder=x, output="polblogs_katz.pdf")
       <...>

    .. testcode:: katz
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=x,
                     vertex_size=gt.prop_to_size(x, mi=5, ma=15),
                     vorder=x, output="polblogs_katz.png")


    .. figure:: polblogs_katz.*
       :align: center

       Katz centrality values of the a political blogs network of
       [adamic-polblogs]_, with random weights attributed to the edges.
Tiago Peixoto's avatar
Tiago Peixoto committed
714 715 716 717 718 719 720

    References
    ----------

    .. [katz-centrality] http://en.wikipedia.org/wiki/Katz_centrality
    .. [katz-new] L. Katz, "A new status index derived from sociometric analysis",
       Psychometrika 18, Number 1, 39-43, 1953, :DOI:`10.1007/BF02289026`
721 722 723
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
Tiago Peixoto's avatar
Tiago Peixoto committed
724 725 726 727 728 729 730 731 732 733 734 735 736 737
    """

    if vprop == None:
        vprop = g.new_vertex_property("double")
        N = len(vprop.a)
        vprop.a = beta.a[:N] if beta is not None else 1.
    if max_iter is None:
        max_iter = 0
    ee = libgraph_tool_centrality.\
         get_katz(g._Graph__graph, _prop("e", g, weight), _prop("v", g, vprop),
         _prop("v", beta, vprop), float(alpha), epsilon, max_iter)
    return vprop


738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
def hits(g, weight=None, xprop=None, yprop=None, epsilon=1e-6, max_iter=None):
    r"""
    Calculate the authority and hub centralities of each vertex in the graph.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be used.
    weight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge property map with the edge weights.
    xprop : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
        Vertex property map where the authority centrality must be stored.
    yprop : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
        Vertex property map where the hub centrality must be stored.
    epsilon : float, optional (default: ``1e-6``)
        Convergence condition. The iteration will stop if the total delta of all
        vertices are below this value.
    max_iter : int, optional (default: ``None``)
        If supplied, this will limit the total number of iterations.

    Returns
    -------
    eig : `float`
        The largest eigenvalue of the cocitation matrix.
    x : :class:`~graph_tool.PropertyMap`
        A vertex property map containing the authority centrality values.
    y : :class:`~graph_tool.PropertyMap`
        A vertex property map containing the hub centrality values.

    See Also
    --------
    betweenness: betweenness centrality
    eigenvector: eigenvector centrality
    pagerank: PageRank centrality
    trust_transitivity: pervasive trust transitivity

    Notes
    -----

    The Hyperlink-Induced Topic Search (HITS) centrality assigns hub
    (:math:`\mathbf{y}`) and authority (:math:`\mathbf{x}`) centralities to the
    vertices, following:

    .. math::

        \begin{align}
            \mathbf{x} &= \alpha\mathbf{A}\mathbf{y} \\
            \mathbf{y} &= \beta\mathbf{A}^T\mathbf{x}
        \end{align}


    where :math:`\mathbf{A}` is the (weighted) adjacency matrix and
    :math:`\lambda = 1/(\alpha\beta)` is the largest eigenvalue of the
    cocitation matrix, :math:`\mathbf{A}\mathbf{A}^T`. (Without loss of
    generality, we set :math:`\beta=1` in the algorithm.)

    The algorithm uses the power method which has a topology-dependent complexity of
    :math:`O\left(N\times\frac{-\log\epsilon}{\log|\lambda_1/\lambda_2|}\right)`,
    where :math:`N` is the number of vertices, :math:`\epsilon` is the ``epsilon``
    parameter, and :math:`\lambda_1` and :math:`\lambda_2` are the largest and
    second largest eigenvalues of the (weighted) cocitation matrix, respectively.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839

    .. doctest:: hits

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> ee, x, y = gt.hits(g)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=x,
       ...               vertex_size=gt.prop_to_size(x, mi=5, ma=15),
       ...               vorder=x, output="polblogs_hits_auths.pdf")
       <...>
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=y,
       ...               vertex_size=gt.prop_to_size(y, mi=5, ma=15),
       ...               vorder=y, output="polblogs_hits_hubs.pdf")
       <...>

    .. testcode:: hits
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=x,
                     vertex_size=gt.prop_to_size(x, mi=5, ma=15),
                     vorder=x, output="polblogs_hits_auths.png")
       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=y,
                     vertex_size=gt.prop_to_size(y, mi=5, ma=15),
                     vorder=y, output="polblogs_hits_hubs.png")


    .. figure:: polblogs_hits_auths.*
       :align: left

       HITS authority values of the a political blogs network of
       [adamic-polblogs]_.

    .. figure:: polblogs_hits_hubs.*
       :align: right

       HITS hub values of the a political blogs network of [adamic-polblogs]_.
840 841 842 843 844 845

    References
    ----------

    .. [hits-algorithm] http://en.wikipedia.org/wiki/HITS_algorithm
    .. [kleinberg-authoritative] J. Kleinberg, "Authoritative sources in a
846
       hyperlinked environment", Journal of the ACM 46 (5): 604-632, 1999,
847 848
       :DOI:`10.1145/324133.324140`.
    .. [power-method] http://en.wikipedia.org/wiki/Power_iteration
849 850 851
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
852 853 854 855 856 857 858 859 860 861 862 863 864 865
    """

    if xprop is None:
        xprop = g.new_vertex_property("double")
    if yprop is None:
        yprop = g.new_vertex_property("double")
    if max_iter is None:
        max_iter = 0
    l = libgraph_tool_centrality.\
         get_hits(g._Graph__graph, _prop("e", g, weight), _prop("v", g, xprop),
                  _prop("v", g, yprop), epsilon, max_iter)
    return 1. / l, xprop, yprop


Tiago Peixoto's avatar
Tiago Peixoto committed
866
def eigentrust(g, trust_map, vprop=None, norm=False, epsilon=1e-6, max_iter=0,
Tiago Peixoto's avatar
Tiago Peixoto committed
867
               ret_iter=False):
868 869 870 871 872
    r"""
    Calculate the eigentrust centrality of each vertex in the graph.

    Parameters
    ----------
873
    g : :class:`~graph_tool.Graph`
874
        Graph to be used.
875
    trust_map : :class:`~graph_tool.PropertyMap`
876
        Edge property map with the values of trust associated with each
877
        edge. The values must lie in the range [0,1].
878
    vprop : :class:`~graph_tool.PropertyMap`, optional (default: ``None``)
879
        Vertex property map where the values of eigentrust must be stored.
880
    norm : bool, optional (default:  ``False``)
881
        Norm eigentrust values so that the total sum equals 1.
882
    epsilon : float, optional (default: ``1e-6``)
883 884
        Convergence condition. The iteration will stop if the total delta of all
        vertices are below this value.
885
    max_iter : int, optional (default: ``None``)
886
        If supplied, this will limit the total number of iterations.
887
    ret_iter : bool, optional (default: ``False``)
888 889 890 891
        If true, the total number of iterations is also returned.

    Returns
    -------
892 893
    eigentrust : :class:`~graph_tool.PropertyMap`
        A vertex property map containing the eigentrust values.
894 895 896 897 898

    See Also
    --------
    betweenness: betweenness centrality
    pagerank: PageRank centrality
899
    trust_transitivity: pervasive trust transitivity
900 901 902

    Notes
    -----
903
    The eigentrust [kamvar-eigentrust-2003]_ values :math:`t_i` correspond the
904 905
    following limit

906 907
    .. math::

908 909 910 911 912
        \mathbf{t} = \lim_{n\to\infty} \left(C^T\right)^n \mathbf{c}

    where :math:`c_i = 1/|V|` and the elements of the matrix :math:`C` are the
    normalized trust values:

913 914
    .. math::

915 916 917 918 919 920 921 922
        c_{ij} = \frac{\max(s_{ij},0)}{\sum_{j} \max(s_{ij}, 0)}

    The algorithm has a topology-dependent complexity.

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953

    .. testsetup:: eigentrust

       np.random.seed(42)

    .. doctest:: eigentrust

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> w = g.new_edge_property("double")
       >>> w.a = np.random.random(len(w.a)) * 42
       >>> t = gt.eigentrust(g, w)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=t,
       ...               vertex_size=gt.prop_to_size(t, mi=5, ma=15),
       ...               vorder=t, output="polblogs_eigentrust.pdf")
       <...>

    .. testcode:: eigentrust
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=t,
                     vertex_size=gt.prop_to_size(t, mi=5, ma=15),
                     vorder=t, output="polblogs_eigentrust.png")


    .. figure:: polblogs_eigentrust.*
       :align: center

       Eigentrust values of the a political blogs network of
       [adamic-polblogs]_, with random weights attributed to the edges.

954 955 956

    References
    ----------
957
    .. [kamvar-eigentrust-2003] S. D. Kamvar, M. T. Schlosser, H. Garcia-Molina
958 959
       "The eigentrust algorithm for reputation management in p2p networks",
       Proceedings of the 12th international conference on World Wide Web,
Tiago Peixoto's avatar
Tiago Peixoto committed
960
       Pages: 640 - 651, 2003, :doi:`10.1145/775152.775242`
961 962 963
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
964 965
    """

Tiago Peixoto's avatar
Tiago Peixoto committed
966 967
    if vprop == None:
        vprop = g.new_vertex_property("double")
968 969
    i = libgraph_tool_centrality.\
           get_eigentrust(g._Graph__graph, _prop("e", g, trust_map),
Tiago Peixoto's avatar
Tiago Peixoto committed
970
                          _prop("v", g, vprop), epsilon, max_iter)
971 972 973 974 975 976 977 978
    if norm:
        vprop.get_array()[:] /= sum(vprop.get_array())

    if ret_iter:
        return vprop, i
    else:
        return vprop

Tiago Peixoto's avatar
Tiago Peixoto committed
979

980
def trust_transitivity(g, trust_map, source=None, target=None, vprop=None):
981
    r"""
982 983
    Calculate the pervasive trust transitivity between chosen (or all) vertices
    in the graph.
984 985 986

    Parameters
    ----------
987
    g : :class:`~graph_tool.Graph`
988
        Graph to be used.
989
    trust_map : :class:`~graph_tool.PropertyMap`
990 991
        Edge property map with the values of trust associated with each
        edge. The values must lie in the range [0,1].
Tiago Peixoto's avatar
Tiago Peixoto committed
992
    source : :class:`~graph_tool.Vertex` (optional, default: None)
993
        Source vertex. All trust values are computed relative to this vertex.
994
        If left unspecified, the trust values for all sources are computed.
Tiago Peixoto's avatar
Tiago Peixoto committed
995
    target : :class:`~graph_tool.Vertex` (optional, default: None)
996 997 998
        The only target for which the trust value will be calculated. If left
        unspecified, the trust values for all targets are computed.
    vprop : :class:`~graph_tool.PropertyMap` (optional, default: None)
999 1000
        A vertex property map where the values of transitive trust must be
        stored.
1001 1002 1003

    Returns
    -------
1004 1005 1006 1007 1008 1009 1010 1011
    trust_transitivity : :class:`~graph_tool.PropertyMap` or float
        A vertex vector property map containing, for each source vertex, a
        vector with the trust values for the other vertices. If only one of
        `source` or `target` is specified, this will be a single-valued vertex
        property map containing the trust vector from/to the source/target
        vertex to/from the rest of the network. If both `source` and `target`
        are specified, the result is a single float, with the corresponding
        trust value for the target.
1012

1013 1014 1015 1016 1017 1018 1019 1020
    See Also
    --------
    eigentrust: eigentrust centrality
    betweenness: betweenness centrality
    pagerank: PageRank centrality

    Notes
    -----
Tiago Peixoto's avatar
Tiago Peixoto committed
1021
    The pervasive trust transitivity between vertices i and j is defined as
1022

1023 1024
    .. math::

1025 1026
        t_{ij} = \frac{\sum_m A_{m,j} w^2_{G\setminus\{j\}}(i\to m)c_{m,j}}
                 {\sum_m A_{m,j} w_{G\setminus\{j\}}(i\to m)}
1027

1028 1029 1030
    where :math:`A_{ij}` is the adjacency matrix, :math:`c_{ij}` is the direct
    trust from i to j, and :math:`w_G(i\to j)` is the weight of the path with
    maximum weight from i to j, computed as
Tiago Peixoto's avatar
Tiago Peixoto committed
1031

1032 1033
    .. math::

1034
       w_G(i\to j) = \prod_{e\in i\to j} c_e.
1035

1036 1037
    The algorithm measures the transitive trust by finding the paths with
    maximum weight, using Dijkstra's algorithm, to all in-neighbours of a given
1038
    target. This search needs to be performed repeatedly for every target, since
1039 1040 1041 1042 1043 1044 1045
    it needs to be removed from the graph first. For each given source, the
    resulting complexity is therefore :math:`O(N^2\log N)` for all targets, and
    :math:`O(N\log N)` for a single target. For a given target, the complexity
    for obtaining the trust from all given sources is :math:`O(kN\log N)`, where
    :math:`k` is the in-degree of the target. Thus, the complexity for obtaining
    the complete trust matrix is :math:`O(EN\log N)`, where :math:`E` is the
    number of edges in the network.
1046 1047 1048 1049 1050

    If enabled during compilation, this algorithm runs in parallel.

    Examples
    --------
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
    .. testsetup:: trust_transitivity

       np.random.seed(42)

    .. doctest:: trust_transitivity

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(g))
       >>> g = gt.Graph(g, prune=True)
       >>> w = g.new_edge_property("double")
       >>> w.a = np.random.random(len(w.a))
       >>> g.vp["label"][g.vertex(42)]
       'blogforamerica.com'
       >>> t = gt.trust_transitivity(g, w, source=g.vertex(42))
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=t,
       ...               vertex_size=gt.prop_to_size(t, mi=5, ma=15),
       ...               vorder=t, output="polblogs_trust_transitivity.pdf")
       <...>

    .. testcode:: trust_transitivity
       :hide:

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=t,
                     vertex_size=gt.prop_to_size(t, mi=5, ma=15),
                     vorder=t, output="polblogs_trust_transitivity.png")


    .. figure:: polblogs_trust_transitivity.*
       :align: center

       Trust transitivity values from source vertex 42 of the a political blogs
       network of [adamic-polblogs]_, with random weights attributed to the
       edges.
Tiago Peixoto's avatar
Tiago Peixoto committed
1084 1085 1086

    References
    ----------
1087 1088 1089
    .. [richters-trust-2010] Oliver Richters and Tiago P. Peixoto, "Trust
       Transitivity in Social Networks," PLoS ONE 6, no. 4:
       e1838 (2011), :doi:`10.1371/journal.pone.0018384`
1090 1091 1092
    .. [adamic-polblogs] L. A. Adamic and N. Glance, "The political blogosphere
       and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the
       Weblogging Ecosystem (2005). :DOI:`10.1145/1134271.1134277`
Tiago Peixoto's avatar
Tiago Peixoto committed
1093

1094
    """
Tiago Peixoto's avatar
Tiago Peixoto committed
1095 1096

    if vprop == None:
1097
        vprop = g.new_vertex_property("vector<double>")
1098

1099 1100 1101 1102
    if target == None:
        target = -1
    else:
        target = g.vertex_index[target]
1103

1104 1105 1106 1107 1108
    if source == None:
        source = -1
    else:
        source = g.vertex_index[source]

1109
    libgraph_tool_centrality.\
1110 1111 1112 1113
            get_trust_transitivity(g._Graph__graph, source, target,
                                   _prop("e", g, trust_map),
                                   _prop("v", g, vprop))
    if target != -1 or source != -1:
1114
        vprop = ungroup_vector_property(vprop, [0])[0]
1115
    if target != -1 and source != -1:
1116
        return vprop.a[target]
1117
    return vprop