__init__.py 11.9 KB
 Tiago Peixoto committed Dec 01, 2008 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 #! /usr/bin/env python # graph_tool.py -- a general graph manipulation python module # # Copyright (C) 2007 Tiago de Paula Peixoto # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see .  Tiago Peixoto committed Jul 15, 2009 19 20 21 22 23 24 25 26 """ graph_tool.community - Community structure ---------------------------------------------- This module contains algorithms for the computation of community structure on graphs. """  Tiago Peixoto committed Dec 01, 2008 27 28 29 from .. dl_import import dl_import dl_import("import libgraph_tool_community")  Tiago Peixoto committed Dec 03, 2008 30 from .. core import _degree, _prop, Graph, libcore  Tiago Peixoto committed Dec 01, 2008 31 32 import random, sys  Tiago Peixoto committed Aug 21, 2009 33 __all__ = ["community_structure", "modularity", "condensation_graph"]  Tiago Peixoto committed Dec 01, 2008 34 35   Tiago Peixoto committed Jul 15, 2009 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 def community_structure(g, n_iter, n_spins, gamma=1.0, corr= "erdos", spins=None, weight=None, t_range=(100.0, 0.01), verbose=False, history_file=None, seed=0): r""" Obtain the community structure for the given graph, used a Potts model approach. Parameters ---------- g : Graph Graph to be used. n_iter : int Number of iterations. n_spins : int Number of maximum spins to be used. gamma : float (optional, default: 1.0) The :math:\gamma parameter of the hamiltonian. corr : string (optional, default: "erdos") Type of correlation to be assumed: Either "erdos", "random" and "correlated". spins : PropertyMap Vertex property maps to store the spin variables. If this is specified, the values will not be initialized to a random value. weight : PropertyMap (optional, default: None) Edge property map with the optional edge weights. t_range : tuple of floats (optional, default: (100.0, 0.01)) Temperature range. verbose : bool (optional, default: False) Display verbose information. history_file : string (optional, default: None) History file to keep information about the simulated annealing. Returns ------- spins : PropertyMap Vertex property map with the spin values. See Also -------- community_structure: obtain the community structure modularity: calculate the network modularity  Tiago Peixoto committed Aug 21, 2009 77  condensation_graph: network of communities  Tiago Peixoto committed Jul 15, 2009 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146  Notes ----- The method of community detection covered here is an implementation of what was proposed in [reichard_statistical_2006]_. It consists of a simulated annealing_ algorithm which tries to minimize the following hamiltonian: .. math:: \mathcal{H}(\{\sigma\}) = - \sum_{i \neq j} \left(A_{ij} - \gamma p_{ij}\right) \delta(\sigma_i,\sigma_j) where :math:p_{ij} is the probability of vertices i and j being connected, which reduces the problem of community detection to finding the ground states of a Potts spin-glass model. It can be shown that minimizing this hamiltonan, with :math:\gamma=1, is equivalent to maximizing Newman's modularity ([newman_modularity_2006]_). By increasing the parameter :math:\gamma, it's possible also to find sub-communities. It is possible to select three policies for choosing :math:p_{ij} and thus choosing the null model: "random" selects a Erdos-Reyni random graph, "uncorrelated" selects an arbitrary random graph with no vertex-vertex correlations, and "correlated" selects a random graph with average correlation taken from the graph itself. Optionally a weight property can be given by the weight option. The most important parameters for the algorithm are the initial and final temperatures (t_range), and total number of iterations (max_iter). It normally takes some trial and error to determine the best values for a specific graph. To help with this, the history option can be used, which saves to a chosen file the temperature and number of spins per iteration, which can be used to determined whether or not the algorithm converged to the optimal solution. Also, the verbose option prints the computation status on the terminal. .. note:: If the spin property already exists before the computation starts, it's not re-sampled at the beginning. This means that it's possible to continue a previous run, if you saved the graph, by properly setting t_range value, and using the same spin property. If enabled during compilation, this algorithm runs in parallel. Examples -------- >>> from pylab import * >>> from numpy.random import seed >>> seed(42) >>> g = gt.load_graph("community.xml") >>> pos = (g.vertex_properties["pos_x"], g.vertex_properties["pos_y"]) >>> spins = gt.community_structure(g, 10000, 20, t_range=(5, 0.1), ... history_file="community-history1") >>> gt.graph_draw(g, pos=pos, pin=True, vsize=0.3, vcolor=spins, ... output="comm1.png") (...) >>> spins = gt.community_structure(g, 10000, 40, t_range=(5, 0.1), ... gamma=2.5, ... history_file="community-history2") >>> gt.graph_draw(g, pos=pos, pin=True, vsize=0.3, vcolor=spins, ... output="comm2.png") (...) >>> clf() >>> xlabel("iterations") <...> >>> ylabel("number of communities") <...>  Tiago Peixoto committed Aug 16, 2009 147  >>> a = loadtxt("community-history1").transpose()  Tiago Peixoto committed Jul 15, 2009 148 149 150 151 152 153 154 155  >>> plot(a[0], a[2]) [...] >>> savefig("comm1-hist.png") >>> clf() >>> xlabel("iterations") <...> >>> ylabel("number of communities") <...>  Tiago Peixoto committed Aug 16, 2009 156  >>> a = loadtxt("community-history2").transpose()  Tiago Peixoto committed Jul 15, 2009 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191  >>> plot(a[0], a[2]) [...] >>> savefig("comm2-hist.png") .. figure:: comm1.png :align: center Community structure with :math:\gamma=1. .. figure:: comm1-hist.png :align: center Algorithm evolution with :math:\gamma=1 .. figure:: comm2.png :align: center Community structure with :math:\gamma=2.5. .. figure:: comm2-hist.png :align: center Algorithm evolution with :math:\gamma=2.5 References ---------- .. [reichard_statistical_2006] Joerg Reichardt and Stefan Bornholdt, "Statistical Mechanics of Community Detection", Phys. Rev. E 74 (2006) 016110, arXiv:cond-mat/0603718 .. [newman_modularity_2006] M. E. J. Newman, "Modularity and community structure in networks", Proc. Natl. Acad. Sci. USA 103, 8577-8582 (2006), arXiv:physics/0602124 .. _simulated annealing: http://en.wikipedia.org/wiki/Simulated_annealing """  Tiago Peixoto committed Dec 01, 2008 192 193 194 195 196  if spins == None: spins = g.new_vertex_property("int32_t") new_spins = True else: new_spins = False  Tiago Peixoto committed Jul 15, 2009 197 198  if history_file == None: history_file = ""  Tiago Peixoto committed Dec 01, 2008 199 200 201  if seed != 0: seed = random.randint(0, sys.maxint) libgraph_tool_community.community_structure(g._Graph__graph, gamma, corr,  Tiago Peixoto committed Jul 15, 2009 202 203 204 205  n_iter, t_range[1], t_range[0], n_spins, new_spins, seed, verbose, history_file, _prop("e", g, weight),  Tiago Peixoto committed Dec 01, 2008 206 207 208  _prop("v", g, spins)) return spins  Tiago Peixoto committed Jul 15, 2009 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 def modularity(g, prop, weight=None): r""" Calculate Newman's modularity. Parameters ---------- g : Graph Graph to be used. prop : PropertyMap Vertex property map with the community partition. weight : PropertyMap (optional, default: None) Edge property map with the optional edge weights. Returns ------- modularity : float Newman's modularity. See Also -------- community_structure: obtain the community structure modularity: calculate the network modularity  Tiago Peixoto committed Aug 21, 2009 231  condensation_graph: network of communities  Tiago Peixoto committed Jul 15, 2009 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268  Notes ----- Given a specific graph partition specified by prop, Newman's modularity ([newman_modularity_2006]_) is defined by: .. math:: Q = \sum_s e_{ss}-\left(\sum_r e_{rs}\right)^2 where :math:e_{rs} is the fraction of edges which fall between vertices with spin s and r. If enabled during compilation, this algorithm runs in parallel. Examples -------- >>> from pylab import * >>> from numpy.random import seed >>> seed(42) >>> g = gt.load_graph("community.dot") >>> spins = gt.community_structure(g, 10000, 10) >>> gt.modularity(g, spins) 0.53531418856240398 References ---------- .. [newman_modularity_2006] M. E. J. Newman, "Modularity and community structure in networks", Proc. Natl. Acad. Sci. USA 103, 8577-8582 (2006), arXiv:physics/0602124 """ m = libgraph_tool_community.modularity(g._Graph__graph, _prop("e", g, weight), _prop("v", g, prop)) return m  Tiago Peixoto committed Dec 03, 2008 269   Tiago Peixoto committed Aug 21, 2009 270 def condensation_graph(g, prop, weight=None):  Tiago Peixoto committed Jul 15, 2009 271  r"""  Tiago Peixoto committed Aug 21, 2009 272 273  Obtain the condensation graph, where each vertex with the same 'prop' value is condensed in one vertex.  Tiago Peixoto committed Jul 15, 2009 274 275 276 277 278 279 280 281 282 283 284 285  Parameters ---------- g : Graph Graph to be used. prop : PropertyMap Vertex property map with the community partition. weight : PropertyMap (optional, default: None) Edge property map with the optional edge weights. Returns -------  Tiago Peixoto committed Aug 21, 2009 286  condensation_graph : Graph  Tiago Peixoto committed Jul 15, 2009 287 288 289 290 291 292 293 294 295 296  The community network vcount : PropertyMap A vertex property map with the vertex count for each community. ecount : PropertyMap An edge property map with the inter-community edge count for each edge. See Also -------- community_structure: obtain the community structure modularity: calculate the network modularity  Tiago Peixoto committed Aug 21, 2009 297  condensation_graph: network of communities  Tiago Peixoto committed Jul 15, 2009 298 299 300  Notes -----  Tiago Peixoto committed Aug 21, 2009 301 302 303 304  Each vertex in the condensation graph represents one community in the original graph (vertices with the same 'prop' value'), and the edges represent existent edges between vertices of the respective communities in the original graph.  Tiago Peixoto committed Jul 15, 2009 305 306 307 308 309 310 311 312  Examples -------- >>> from pylab import * >>> from numpy.random import poisson, seed >>> seed(42) >>> g = gt.random_graph(1000, lambda: poisson(3), directed=False) >>> spins = gt.community_structure(g, 10000, 100)  Tiago Peixoto committed Aug 21, 2009 313  >>> ng = gt.condensation_graph(g, spins)  Tiago Peixoto committed Jul 15, 2009 314 315 316 317 318 319 320 321 322 323 324 325 326  >>> size = ng[0].new_vertex_property("double") >>> size.get_array()[:] = log(ng[1].get_array()+1) >>> gt.graph_draw(ng[0], vsize=size, vcolor=size, splines=True, ... eprops={"len":20, "penwidth":10}, vprops={"penwidth":10}, ... output="comm-network.png") (...) .. figure:: comm-network.png :align: center Community network of a random graph. The color and sizes of the nodes indicate the size of the corresponding community. """  Tiago Peixoto committed Dec 03, 2008 327  gp = Graph()  Tiago Peixoto committed Jul 15, 2009 328  vcount = gp.new_vertex_property("int32_t")  Tiago Peixoto committed Dec 03, 2008 329  if weight != None:  Tiago Peixoto committed Jul 15, 2009 330  ecount = gp.new_edge_property("double")  Tiago Peixoto committed Dec 03, 2008 331  else:  Tiago Peixoto committed Jul 15, 2009 332  ecount = gp.new_edge_property("int32_t")  Tiago Peixoto committed Dec 03, 2008 333 334 335 336 337  libgraph_tool_community.community_network(g._Graph__graph, gp._Graph__graph, _prop("v", g, prop), _prop("v", g, vcount), _prop("e", g, ecount),  Tiago Peixoto committed Jul 15, 2009 338  _prop("e", g, weight))  Tiago Peixoto committed Dec 03, 2008 339 340  return gp, vcount, ecount