konect.py 3.87 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
 #! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
# Copyright (C) 2006-2015 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
    range = xrange

import os.path
import tempfile
import urllib.request
import tarfile
import warnings
import numpy

from .. import Graph

def load_koblenz_dir(dirname):
    g = Graph()
    g.gp.meta = g.new_graph_property("string")
    g.gp.readme = g.new_graph_property("string")
    for root, dirs, files in os.walk(dirname):
        for file in files:
            if file.startswith("README"):
                g.gp.readme = open(os.path.join(root,file)).read()
            if file.startswith("meta."):
                g.gp.meta = open(os.path.join(root,file)).read()
            if file.startswith("out."):
                edges = numpy.loadtxt(os.path.join(root,file), comments="%")
                line = next(open(os.path.join(root,file)))
                if "asym" not in line:
                    g.set_directed(False)
                edges[:,:2] -= 1  # we need zero-based indexing
                g.add_edge_list(edges[:,:2])
                if edges.shape[1] > 2:
                    g.ep.weight = g.new_edge_property("int")
                    g.ep.weight.a = edges[:,2]
                if edges.shape[1] > 3:
                    g.ep.time = g.new_edge_property("int")
                    g.ep.time.a = edges[:,3]
        for file in files:
            if file.startswith("ent."):
                try:
                    g.vp.meta = g.new_vertex_property("string")
                    meta = g.vp.meta
                    count = 0
                    for line in open(os.path.join(root,file)):
                        vals = line.split()
                        if len(vals) == 1 and vals[0] == "%":
                            continue
                        if vals[0] == "%":
                            g.gp.meta_desc = g.new_graph_property("string", line)
                            continue
                        v = g.vertex(count)
                        meta[v] = line.strip()
                        count += 1
                except ValueError as e:
                    warnings.warn("error automatically reading node metadata from file '%s': %s" % (file, str(e)))
    return g

def get_koblenz_network_data(name):
    with tempfile.TemporaryFile(mode='w+b') as ftemp:
        with urllib.request.urlopen('http://konect.uni-koblenz.de/downloads/tsv/%s.tar.bz2' % name) as response:
            buflen = 1 << 20
            while True:
                buf = response.read(buflen)
                ftemp.write(buf)
                if len(buf) < buflen:
                    break
        ftemp.seek(0)
        with tempfile.TemporaryDirectory(suffix=name) as tempdir:
            with tarfile.open(fileobj=ftemp, mode='r:bz2') as tar:
                tar.extractall(path=tempdir)
            g = load_koblenz_dir(tempdir)
            return g

class LazyKoblenzDataDict(dict):
    def __getitem__(self, k):
        if k not in self:
            g = get_koblenz_network_data(k)
            dict.__setitem__(self, k, g)
            return g
        return dict.__getitem__(self, k)


konect_data = LazyKoblenzDataDict()