__init__.py 5.11 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
3
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
Tiago Peixoto's avatar
Tiago Peixoto committed
4
# Copyright (C) 2021 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
5
#
6
7
8
9
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
Tiago Peixoto's avatar
Tiago Peixoto committed
10
#
11
12
13
14
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
# details.
Tiago Peixoto's avatar
Tiago Peixoto committed
15
#
16
17
# You should have received a copy of the GNU Affero General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
Tiago Peixoto's avatar
Tiago Peixoto committed
18
19
20
21

from .. import *

title = "Networks with group metadata"
22
description = """Snapshots of LiveJournal, Friendster, Orkut, and YouTube online social networks, as well as DBLP and Amazon. Node metadata represents a post hoc definition of a 'community' that a node belongs to, derived from topical labels of the node or interest-based 'groups' that a node links to.[^icon]
Tiago Peixoto's avatar
Tiago Peixoto committed
23
24
25
26
27
28
29
30
31
32
33
34
35

Friendster is an on-line gaming network. Before re-launching as a game website, Friendster was a social networking site where users can form friendship edge each other. Friendster social network also allows users form a group which other members can then join. We consider such user-defined groups as communities. For the social network, we take the induced subgraph of the nodes that either belong to at least one community or are connected to other nodes that belong to at least one community. This data is provided by The Web Archive Project, where the full graph is available. 

LiveJournal is a free on-line blogging community where users declare friendship each other. LiveJournal also allows users form a group which other members can then join.

Orkut is a free on-line social network where users form friendship each other. Orkut also allows users form a group which other members can then join.  This data is provided by Alan Mislove et al.

Youtube is a video-sharing web site that includes a social network. In the Youtube social network, users form friendship each other and users can create groups which other users can join. This data is provided by Alan Mislove et al.

The DBLP computer science bibliography provides a comprehensive list of research papers in computer science. We construct a co-authorship network where two authors are connected if they publish at least one paper together. Publication venue, e.g, journal or conference, defines an individual community; authors who published to a certain journal or conference form a community. 

The Amazon Network was collected by crawling the website. It is based on Customers Who Bought This Item Also Bought feature of the Amazon website. If a product i is frequently co-purchased with product j, the graph contains an undirected edge from i to j. Each product category provided by Amazon defines each community. 

36
[^icon]: Description obtained from the [ICON](https://icon.colorado.edu) project.
Tiago Peixoto's avatar
Tiago Peixoto committed
37
38
39
"""
tags = ["Online", "Social", "Collaboration", "Informational", "Relatedness", "Unweighted", "Metadata"]
url = 'https://snap.stanford.edu/data/com-Friendster.html'
Tiago Peixoto's avatar
Tiago Peixoto committed
40
citation = [('J. Yang and J. Leskovec. "Defining and Evaluating Network Communities based on Ground-truth." ICDM, 2012.',
Tiago Peixoto's avatar
Tiago Peixoto committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
             "http://arxiv.org/abs/1205.6233")]
icon_hash = '56a85c8e26855e083a2f78b4'
ustream_license = None
upstream_prefix = 'https://snap.stanford.edu/data/bigdata/communities'
files = [(("com-friendster.ungraph.txt.gz", "com-friendster.all.cmty.txt.gz"), "friendster", ("snap", dict(hashed=True))),
         (("com-lj.ungraph.txt.gz", "com-lj.all.cmty.txt.gz"), "livejournal", ("snap", dict(hashed=True))),
         (("com-orkut.ungraph.txt.gz", "com-orkut.all.cmty.txt.gz"), "orkut", ("snap", dict(hashed=True))),
         (("com-youtube.ungraph.txt.gz", "com-youtube.all.cmty.txt.gz"), "youtube", ("snap", dict(hashed=True))),
         (("com-dblp.ungraph.txt.gz", "com-dblp.all.cmty.txt.gz"), "dblp", ("snap", dict(hashed=True))),
         (("com-amazon.ungraph.txt.gz", "com-amazon.all.cmty.txt.gz"), "amazon", ("snap", dict(hashed=True)))]

def fetch_upstream(force=False):
    return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
                                force)

@cache_network()
@coerce_props()
@annotate()
def parse(alts=None):
    global files
    name = __name__.split(".")[-1]
    for fnames, alt, fmt in files:
        if alts is not None and alt not in alts:
            continue
        if isinstance(fnames, str):
            fnames = [fnames]
        with ExitStack() as stack:
            fs = [stack.enter_context(open_upstream_file(name, fn, "rb")) for fn in fnames]
            g = parse_graph([fs[0]], fmt, directed=False)
            vs = {g.vp.name[v] : v for v in g.vertices()}
            g.vp.communities = c = g.new_vp("vector<int>")

            with io.TextIOWrapper(fs[1], "utf8") as f:
                for i, line in enumerate(f):
                    for x in line.split():
                        v = vs[int(x)]
                        c[v].append(i)
        yield alt, g