Commit 3aad40b1 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add inploid

This fixes issue #1.
parent 0875de0f
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from .. import *
title = "Inploid: an online social Q&A platform"
description = """Inploid is a social question & answer website in Turkish. Users can follow others and see their questions and answers on the main page. Each user is associated with a reputability score which is influenced by feedback of others about questions and answers of the user. Each user can also specify interest in topics. The data is crawled in June 2017 and consist of 39,750 nodes and 57,276 directed links between them. In addition, for each user, reputability scores and top five topics are included in the dataset. Usernames and topics are anonymized."""
tags = ['Online', 'Social', 'Directed', 'Unweighted']
url = 'https://furkangursoy.github.io/datasets/#inploid'
citation = [('F. Gursoy and D. Gunnec, Influence maximization in social networks under deterministic linear threshold model, Knowledge-Based Systems (2018)', 'https://doi.org/10.1016/j.knosys.2018.07.040')]
icon_hash = None
upstream_license = ("CC BY 4.0", "https://creativecommons.org/licenses/by/4.0/")
upstream_prefix = 'https://github.com/furkangursoy/datasets/raw/master'
files = [(('inploid.zip:inploid/links.csv', 'inploid.zip:inploid/nodes.csv'), None , ("csv", dict(vprops=dict(fmt="csv", add_missing=True))))]
def fetch_upstream(force=False):
return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
force)
@cache_network()
@coerce_props()
@annotate()
def parse(alts=None):
global files
name = __name__.split(".")[-1]
for fnames, alt, fmt in files:
if alts is not None and alt not in alts:
continue
if isinstance(fnames, str):
fnames = [fnames]
with ExitStack() as stack:
fs = [stack.enter_context(open_upstream_file(name, fn, "rb")) for fn in fnames]
g = parse_graph(fs, fmt, directed=True)
yield alt, g
......@@ -70,7 +70,8 @@ def parse_graph(fs, fmt, directed):
vprops = params.get("vprops", None)
if vprops is not None:
if vprops["fmt"] == "csv":
vprops_from_csv(g, io.TextIOWrapper(fs[1], "utf8"), idxs=g.vp.name, add_missing=False)
vprops_from_csv(g, io.TextIOWrapper(fs[1], "utf8"), idxs=g.vp.name,
add_missing=vprops.get("add_missing", False))
elif vprops["fmt"] == "txt":
vprops_from_txt(g, io.TextIOWrapper(fs[1], "utf8"), pnames=vprops["names"])
......@@ -211,7 +212,19 @@ def parse_pajek(f):
def vprops_from_csv(g, f, idxs=None, idx_col=True, add_missing=True, pnames=None,
csv_options={}):
reader = csv.reader(f, **csv_options)
sample = []
for i, line in enumerate(f):
sample.append(line)
if i > 1000:
break
sample = "".join(sample)
sniffer = csv.Sniffer()
dialect = sniffer.sniff(sample)
f.seek(0)
reader = csv.reader(f, **dict(dict(dialect=dialect), **csv_options))
props = None
if pnames is not None:
props = [g.new_vp("string") for n in pnames]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment