Newer
Older
# Copyright (C) 2020-2024 Tiago de Paula Peixoto <tiago@skewed.de>
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from .. import *
title = "DBpedia network (v3.6)"
description = """A network among all entries in DBpedia, a project that extracts structured information from Wikipedia. Nodes represent entities in DBpedia and an edge connects two entities based on DBpedia's notion of their relatedness. The data is extracted from the version 3.6 of the database.[^icon]
[^icon]: Description obtained from the [ICON](https://icon.colorado.edu) project."""
tags = ['Informational', 'Relatedness', 'Unweighted', 'Multigraph']
url = 'http://konect.cc/networks/dbpedia-all'
citation = [('S. Auer, et al. "Dbpedia: A nucleus for a web of open data." Proc. Int. Semantic Web Conf., 722-735 (2008)', 'https://doi.org/10.1007/978-3-540-76298-0_52')]
icon_hash = '58322a588cf714f319c427ab'
upstream_prefix = 'http://konect.cc/files'
files = [('download.tsv.dbpedia-all.tar.bz2', None, 'konect')]
def fetch_upstream(force=False):
return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
force)
@cache_network()
@coerce_props()
@annotate()
def parse(alts=None):
global files
name = __name__.split(".")[-1]
for fnames, alt, fmt in files:
if alts is not None and alt not in alts:
continue
if isinstance(fnames, str):
fnames = [fnames]
with ExitStack() as stack:
fs = [stack.enter_context(open_upstream_file(name, fn, "rb")) for fn in fnames]
g = parse_graph(fs, fmt, directed=True)
yield alt, g