Commit 1814deec authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add un_migrations entry

parent 8c55460f
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2020 Tiago de Paula Peixoto <>
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <>.
from openpyxl import load_workbook
import pandas
import shutil
import subprocess
import tempfile
from .. import *
title = "UN migration stock (2015)"
description = """A network of migration between countries, collected by the United Nations. A directed edge gives the flow of migration, and an edge property gives the number of migrants, for each given year and sex. Estimates are presented for 1990, 1995, 2000, 2005, 2010 and 2015 and are available for all countries and areas of the world. The estimates are based on official statistics on the foreign-born or the foreign population."""
tags = ['Social', 'Economic', 'Travel', 'Political', 'Weighted', 'Metadata']
url = ''
citation = [('United Nations, Department of Economic and Social Affairs, Population Division (2015). "Trends in International Migrant Stock: The 2015 Revision." (United Nations database, POP/DB/MIG/Stock/Rev.2015)',
icon_hash = None
upstream_prefix = ''
files = [('UN_MigrantStockByOriginAndDestination_2015.xlsx', None, None)]
def fetch_upstream(force=False):
return fetch_upstream_files(__name__.split(".")[-1], upstream_prefix, files,
def parse(alts=None):
global files
name = __name__.split(".")[-1]
for fname, alt, fmt in files:
if alts is not None and alt not in alts:
with open_upstream_file(name, fname, "rb") as f:
with tempfile.TemporaryDirectory(dir="./cache/temp", suffix="un_migration") as tdir:
with open(f"{tdir}/dump.xls", "wb") as fw:
shutil.copyfileobj(f, fw)["localc", "--convert-to", "xlsx",
f"dump.xls"], cwd=tdir, check=True)
wb = load_workbook(f"{tdir}/dump.xlsx")
g = Graph() = g.new_vp("string")
g.vp.code = g.new_vp("int")
g.vp.region = g.new_vp("string")
g.vp.sub_region = g.new_vp("string")
is_region = g.new_vp("bool")
years = [1990, 1995, 2000, 2005, 2010, 2015]
sexes = ["total", "male", "female"]
regions = ["Africa", "Asia", "Europe", "Latin America and the Caribbean",
"Northern America", "Oceania"]
for year in years:
for sex in sexes:
g.ep[f"migration_{year}_{sex}"] = g.new_ep("int")
vertices = {}
def get_vertex(name):
v = vertices.get(name, None)
if v is None:
v = g.add_vertex()
vertices[name] = v[v] = name
return v
wb = list(wb)
for ti, year in enumerate(years):
for si, sex in enumerate(sexes):
df = pandas.DataFrame(wb[ti * 3 + si + 1].values)
m = numpy.array(df.values)
src_names = m[15][1:]
m = m[16:, 1:]
active_region = ""
active_subregion = ""
for row in range(265):
for col in range(m.shape[1]):
if col == 0:
tgt = get_vertex(m[row, col])
g.vp.region[tgt] = active_region
g.vp.sub_region[tgt] = active_subregion
elif col == 2:
g.vp.code[tgt] = m[row, col]
elif col == 3:
if m[row, col] is None:
is_region[tgt] = True
if m[row, 0] in regions:
active_region = m[row, 0]
active_subregion = m[row, 0]
elif col > 6:
if m[row, col] is None:
src = get_vertex(src_names[col])
e = g.edge(src, tgt, add_missing=True)
g.ep[f"migration_{year}_{sex}"][e] = m[row, col]
vs = [v for v in g.vertices() if is_region[v]]
yield alt, g
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment