Commit b676f0bf authored by Tiago Peixoto's avatar Tiago Peixoto

Add plant_pol_robertson

parent 5dec248c
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from .. import *
from openpyxl import load_workbook
import pandas
import shutil
import subprocess
import tempfile
title = "Robertson plant-pollinator web"
description = """A bipartite network of plants and pollinators, from southwestern Illinois, USA[^icon]
[^icon]: Description obtained from the [ICON](https://icon.colorado.edu) project."""
tags = ['Biological', 'Food web', 'Unweighted']
url = 'https://iwdb.nceas.ucsb.edu/html/robertson_1929.html'
citation = [('C. Robertson, "Flowers and insects: lists of visitors to four hundred and fifty-three flowers." Carlinville, IL, USA, C. Robertson. (1929)', 'https://www.google.com/search?q=C.+Robertson%2C+"Flowers+and+insects%3A+lists+of+visitors+to+four+hundred+and+fifty-three+flowers."+Carlinville%2C+IL%2C+USA%2C+C.+Robertson.+(1929)&oq=C.+Robertson%2C+"Flowers+and+insects%3A+lists+of+visitors+to+four+hundred+and+fifty-three+flowers."+Carlinville%2C+IL%2C+USA%2C+C.+Robertson.+(1929)')]
icon_hash = '56a85c8e26855e083a2f790b'
ustream_license = None
upstream_prefix = 'https://iwdb.nceas.ucsb.edu/data/plant_pollinator/excel'
files = [('robertson_1929.xls', None, None)]
def fetch_upstream(force=False):
return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
force)
@cache_network()
@coerce_props()
@annotate()
def parse(alts=None):
global files
name = __name__.split(".")[-1]
for fnames, alt, fmt in files:
if alts is not None and alt not in alts:
continue
if isinstance(fnames, str):
fnames = [fnames]
with ExitStack() as stack:
fs = [stack.enter_context(open_upstream_file(name, fn, "rb")) for fn in fnames]
with tempfile.TemporaryDirectory(dir="./cache/temp") as tdir:
with open(f"{tdir}/dump.xls", "wb") as fw:
shutil.copyfileobj(fs[0], fw)
subprocess.run(["localc", "--convert-to", "xlsx",
f"dump.xls"], cwd=tdir, check=True)
wb = load_workbook(f"{tdir}/dump.xlsx", read_only=True)
g = Graph(directed=False)
g.vp.name = g.new_vp("string")
g.vp.family = g.new_vp("string")
g.vp.order = g.new_vp("string")
g.vp.is_pollinator = g.new_vp("bool")
vs = {}
m = pandas.DataFrame(wb.active.values).values
for i in range(0, 15265):
pname, pfam, polname, polorder, polfam = m[i + 1, :5]
if pname not in vs:
v = g.add_vertex()
g.vp.name[v] = pname
g.vp.family[v] = pfam
vs[pname] = v
else:
v = vs[pname]
if polname not in vs:
u = g.add_vertex()
g.vp.name[u] = polname
g.vp.family[u] = polfam
g.vp.order[u] = polorder
g.vp.is_pollinator[u] = True
vs[polname] = u
else:
u = vs[polname]
e = g.add_edge(v, u)
yield alt, g
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment