diff --git a/entries/plant_pol_vazquez/__init__.py b/entries/plant_pol_vazquez/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4302cb8cd6790e4fa5479140ae69e5daaa331ef1 --- /dev/null +++ b/entries/plant_pol_vazquez/__init__.py @@ -0,0 +1,119 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2020 Tiago de Paula Peixoto +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . + +from .. import * +from openpyxl import load_workbook +import pandas +import shutil +import subprocess +import tempfile + +title = "Vazquez & Simberloff plant-pollinator webs" +description = """Eight bipartite networks of plants and pollinators, from the Nahuel Huapi National Park and surrounding areas in Rio Negro, Argentina, from September 1999 to Feburary 2000. Edge weights represent the frequency of species interaction, and there is a common set of vertices across all eight webs[^icon] +[^icon]: Description obtained from the [ICON](https://icon.colorado.edu) project.""" +tags = ['Biological', 'Food web', 'Weighted', 'Multilayer'] +url = 'https://iwdb.nceas.ucsb.edu/html/vazquez_2002.html' +citation = [('D.P. Vazquez and D. Simberloff, "Changes in interaction biodiversity induced by an introduced ungulate. Ecology Letters 6, 1077-1083 (2003)', 'http://onlinelibrary.wiley.com/doi/10.1046/j.1461-0248.2003.00534.x/abstract')] +icon_hash = '56a85c8e26855e083a2f790f' +ustream_license = None +upstream_prefix = 'https://iwdb.nceas.ucsb.edu/data/plant_pollinator/excel' +files = [('vazquez_2002.xls', 'Llao Llao', None), + ('vazquez_2002.xls', 'Cerro Lopez', None), + ('vazquez_2002.xls', 'Safariland', None), + ('vazquez_2002.xls', 'Arroyo Goye', None), + ('vazquez_2002.xls', 'Mascardi (nc)', None), + ('vazquez_2002.xls', 'Mascardi (c)', None), + ('vazquez_2002.xls', 'Quetrihue (nc)', None), + ('vazquez_2002.xls', 'Quetrihue (c)', None), + ('vazquez_2002.xls', 'All sites pooled', None)] + +def fetch_upstream(force=False): + return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files, + force) + +@cache_network() +@coerce_props() +@annotate() +def parse(alts=None): + global files + name = __name__.split(".")[-1] + for fnames, alt, fmt in files: + if alts is not None and alt not in alts: + continue + if isinstance(fnames, str): + fnames = [fnames] + with ExitStack() as stack: + fs = [stack.enter_context(open_upstream_file(name, fn, "rb")) for fn in fnames] + + with tempfile.TemporaryDirectory(dir="./cache/temp") as tdir: + with open(f"{tdir}/dump.xls", "wb") as fw: + shutil.copyfileobj(fs[0], fw) + subprocess.run(["localc", "--convert-to", "xlsx", + f"dump.xls"], cwd=tdir, check=True) + wb = load_workbook(f"{tdir}/dump.xlsx", read_only=True) + + g = Graph(directed=False) + g.vp.idx = g.new_vp("int") + g.vp.pollinator = g.new_vp("bool") + g.ep.count = g.new_ep("int") + + def add_bip_edges(m): + plts = {} + pols = {} + for i in range(1, m.shape[0]): + v = g.add_vertex() + g.vp.pollinator[v] = True + pols[m[i,0]] = v + g.vp.idx[v] = m[i,0] + + for j in range(1, m.shape[1]): + u = plts.get(m[0, j], None) + if u is None: + u = g.add_vertex() + g.vp.pollinator[u] = False + plts[m[0, j]] = u + g.vp.idx[u] = m[0, j] + + if m[i,j] is None: + continue + if numpy.isnan(float(m[i,j])): + continue + x = int(m[i, j]) + if x > 0: + e = g.add_edge(v, u) + g.ep.count[e] = x + return plts, pols + + m = pandas.DataFrame(wb[alt].values).values + plts, pols = add_bip_edges(m[1:-3,:-2]) + + for ti, t in enumerate(["Plant sp", "Pollinator sp"]): + m = pandas.DataFrame(wb[t].values).values + vs = plts if ti == 0 else pols + + for j in range(1, m.shape[1]): + p = m[0, j] + if p not in g.vp: + g.vp[p] = g.new_vp("string") + for i in range(1, m.shape[0]): + idx = m[i, 0] + if idx is None: + continue + g.vp[p][vs[idx]] = m[i, j] + + yield alt, g