Skip to content
Snippets Groups Projects
__init__.py 2.24 KiB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020-2024 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
Tiago Peixoto's avatar
Tiago Peixoto committed
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
# details.
Tiago Peixoto's avatar
Tiago Peixoto committed
#
# You should have received a copy of the GNU Affero General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
Tiago Peixoto's avatar
Tiago Peixoto committed

from .. import *

title = "DBpedia network (v3.6)"
description = """A network among all entries in DBpedia, a project that extracts structured information from Wikipedia. Nodes represent entities in DBpedia and an edge connects two entities based on DBpedia's notion of their relatedness. The data is extracted from the version 3.6 of the database.[^icon]
[^icon]: Description obtained from the [ICON](https://icon.colorado.edu) project."""
Tiago Peixoto's avatar
Tiago Peixoto committed
tags = ['Informational', 'Relatedness', 'Unweighted', 'Multigraph']
url = 'http://konect.cc/networks/dbpedia-all'
citation = [('S. Auer, et al. "Dbpedia: A nucleus for a web of open data." Proc. Int. Semantic Web Conf., 722-735 (2008)', 'https://doi.org/10.1007/978-3-540-76298-0_52')]
Tiago Peixoto's avatar
Tiago Peixoto committed
icon_hash = '58322a588cf714f319c427ab'
upstream_prefix = 'http://konect.cc/files'
files = [('download.tsv.dbpedia-all.tar.bz2', None, 'konect')]

def fetch_upstream(force=False):
    return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
                                force)

@cache_network()
@coerce_props()
@annotate()
def parse(alts=None):
    global files
    name = __name__.split(".")[-1]
    for fnames, alt, fmt in files:
        if alts is not None and alt not in alts:
            continue
        if isinstance(fnames, str):
            fnames = [fnames]
        with ExitStack() as stack:
            fs = [stack.enter_context(open_upstream_file(name, fn, "rb")) for fn in fnames]
            g = parse_graph(fs, fmt, directed=True)
        yield alt, g