__init__.py 5.17 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
3
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
Tiago Peixoto's avatar
Tiago Peixoto committed
4
# Copyright (C) 2021 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.

from openpyxl import load_workbook
import pandas
import shutil
import subprocess
import tempfile
from .. import *

title = "UN migration stock (2015)"
description = """A network of migration between countries, collected by the United Nations. A directed edge gives the flow of migration, and an edge property gives the number of migrants, for each given year and sex. Estimates are presented for 1990, 1995, 2000, 2005, 2010 and 2015 and are available for all countries and areas of the world. The estimates are based on official statistics on the foreign-born or the foreign population."""
tags = ['Social', 'Economic', 'Travel', 'Political', 'Weighted', 'Metadata']
url = 'https://www.un.org/en/development/desa/population/migration/data/estimates2/estimates15.asp'
citation = [('United  Nations,  Department  of  Economic  and  Social  Affairs,  Population  Division  (2015). "Trends in International Migrant Stock: The 2015 Revision." (United Nations database, POP/DB/MIG/Stock/Rev.2015)',
             'https://www.un.org/en/development/desa/population/migration/data/estimates2/estimates15.asp')]
icon_hash = None
upstream_prefix = 'https://www.un.org/en/development/desa/population/migration/data/estimates2/data'
files = [('UN_MigrantStockByOriginAndDestination_2015.xlsx', None, None)]

def fetch_upstream(force=False):
    return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
                                force)

@cache_network()
@coerce_props()
@annotate()
def parse(alts=None):
    global files
    name = __name__.split(".")[-1]
    for fname, alt, fmt in files:
        if alts is not None and alt not in alts:
            continue

        with open_upstream_file(name, fname, "rb") as f:
            with tempfile.TemporaryDirectory(dir="./cache/temp", suffix="un_migration") as tdir:
                with open(f"{tdir}/dump.xls", "wb") as fw:
                    shutil.copyfileobj(f, fw)
                subprocess.run(["localc", "--convert-to", "xlsx",
                                f"dump.xls"], cwd=tdir, check=True)
                wb = load_workbook(f"{tdir}/dump.xlsx")


        g = Graph()
        g.vp.name = g.new_vp("string")
        g.vp.code = g.new_vp("int")
        g.vp.region = g.new_vp("string")
        g.vp.sub_region = g.new_vp("string")
        is_region = g.new_vp("bool")

        years = [1990, 1995, 2000, 2005, 2010, 2015]
        sexes = ["total", "male", "female"]
        regions = ["Africa", "Asia", "Europe", "Latin America and the Caribbean",
                   "Northern America", "Oceania"]

        for year in years:
            for sex in sexes:
                g.ep[f"migration_{year}_{sex}"] = g.new_ep("int")

        vertices = {}
        def get_vertex(name):
            v = vertices.get(name, None)
            if v is None:
                v = g.add_vertex()
                vertices[name] = v
                g.vp.name[v] = name
            return v

        wb = list(wb)
        for ti, year in enumerate(years):
            for si, sex in enumerate(sexes):
                df = pandas.DataFrame(wb[ti * 3 + si + 1].values)
                m = numpy.array(df.values)
                src_names = m[15][1:]
                m = m[16:, 1:]

                active_region = ""
                active_subregion = ""
                for row in range(265):
                    for col in range(m.shape[1]):
                        if col == 0:
                            tgt = get_vertex(m[row, col])
                            g.vp.region[tgt] = active_region
                            g.vp.sub_region[tgt] = active_subregion
                        elif col == 2:
                            g.vp.code[tgt] = m[row, col]
                        elif col == 3:
                            if m[row, col] is None:
                                is_region[tgt] = True
                                if m[row, 0] in regions:
                                    active_region = m[row, 0]
                                else:
                                    active_subregion = m[row, 0]
                        elif col > 6:
                            if m[row, col] is None:
                                continue
                            src = get_vertex(src_names[col])
                            e = g.edge(src, tgt, add_missing=True)
                            g.ep[f"migration_{year}_{sex}"][e] = m[row, col]

        vs = [v for v in g.vertices() if is_region[v]]
        g.remove_vertex(vs)

        yield alt, g