Commit 05645a32 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add entry 'human_brains'

parent 393b0f3d
......@@ -426,7 +426,10 @@ def analyze_entries(entries, names=[], skip=[], force=[], cache_only=True,
if entry.name in ["openstreetmap"]:
continue
for alt, g in entry.parse(lazy=True, cache_only=True):
entry.analyses[alt][a] = ret[(entry.name, alt)]
try:
entry.analyses[alt][a] = ret[(entry.name, alt)]
except KeyError:
entry.analyses[alt][a] = None
if __name__ == "__main__":
if len(sys.argv) > 1:
......
This diff is collapsed.
......@@ -22,6 +22,7 @@ import pickle
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from tempfile import TemporaryFile
if os.path.exists("gdrive_token.pickle"):
with open('gdrive_token.pickle', 'rb') as f:
......@@ -35,15 +36,21 @@ else:
gdrive_service = None
def save_gdrive_file(file_id, fobj, check_health=False):
def save_gdrive_file(file_id, fobj):
global gdrive_service, creds
if gdrive_service is None:
gdrive_service = build('drive', 'v3', credentials=creds)
request = gdrive_service.files().get_media(fileId=file_id)
downloader = MediaIoBaseDownload(fobj, request,
chunksize=104857600 if not check_health else 1)
done = False
while not done:
status, done = downloader.next_chunk()
if check_health:
return
check = False
with TemporaryFile(mode="w+b") as tmp:
if fobj is None:
check = True
fobj = tmp
downloader = MediaIoBaseDownload(fobj, request,
chunksize=104857600 if not check else 1)
done = False
while not done:
status, done = downloader.next_chunk()
if check:
return
......@@ -54,7 +54,7 @@ analyze.analyze_entries(entries.values(), skip=["pos", "knn_proj"],
icon_ref = {entry.icon_hash : entry for entry in entries.values()}
whales = ["openstreetmap"]
whales = ["openstreetmap", "human_brains"]
import markdown
......@@ -213,7 +213,7 @@ def stats_page():
n_undirected=n_undirected, n_bip=n_bip,
n_nets_s=n_nets_s, n_directed_s=n_directed_s,
n_undirected_s=n_undirected_s, n_bip_s=n_bip_s,
whales=",".join(whales), analyses=analyze.titles,
whales=whales, analyses=analyze.titles,
scales=analyze.scales)
@app.route("/draw/<net>")
......@@ -288,7 +288,7 @@ def api_entries():
tags = set(tags.split(","))
exclude = request.args.get('exclude', None)
if exclude is not None:
exclude = set(exclude.split(","))
exclude = set([x.strip() for x in exclude.split(",")])
full = bool(request.args.get('full', False))
fentries = [entry for entry in entries.values()
if ((tags is None or len(set(entry.tags) & tags) > 0) and
......
......@@ -30,6 +30,8 @@ import pickle
import base64
import zipfile
from openpyxl import load_workbook
from graph_tool.all import *
def parse_graph(fs, fmt, directed):
......@@ -170,7 +172,8 @@ def parse_snap(f, directed, hashed=False, hash_type="int"):
def parse_loadtxt(f, directed, params={}):
g = Graph(directed=directed)
edges = numpy.loadtxt(f, dtype=str)
with io.TextIOWrapper(f, "utf8") as ft:
edges = numpy.loadtxt(ft, dtype=str)
eprops = [g.new_ep("string") for i in range(edges.shape[1]-2)]
g.vp.name = g.add_edge_list(edges, hashed=True, hash_type="string",
eprops=eprops)
......@@ -432,4 +435,59 @@ def parse_comunelab(f, directed):
dp[k] = p.copy("double")
except ValueError:
pass
return g
\ No newline at end of file
return g
def parse_excel_matrix(fobj, crange=(0, 10), rrange=(0, 10), clabel=None,
rlabel=None, is_bip=False, sheet=None, directed=False):
with TemporaryDirectory(dir="./cache/temp") as tdir:
with open(f"{tdir}/dump.xls", "wb") as fw:
shutil.copyfileobj(fs[0], fw)
subprocess.run(["localc", "--convert-to", "xlsx",
f"dump.xls"], cwd=tdir, check=True)
wb = load_workbook(f"{tdir}/dump.xlsx", read_only=True)
if sheet is not None:
wb = wb[sheet]
g = Graph(directed=directed)
g.ep.count = g.new_ep("string")
if clabel is not None:
g.vp[clabel[1]] = g.new_vp("string")
if rlabel is not None:
g.vp[rlabel[1]] = g.new_vp("string")
if is_bib is not False:
g.vp[is_bip[1]] = g.new_vp("bool")
vs = {}
m = pandas.DataFrame(wb.active.values).values
N = crange[1] - crange[0]
g.add_vertex(N)
if is_bib is not False:
g.add_vertex(rrange[1] - rrange[0])
for i, c in enumerate(range(crange[0], crange[1])):
if clabel is not None:
g.vp[clabel[1]][i] = m[c, rrange[0] - 1]
for j, r in enumerate(range(rrange[0], rrange[1])):
if rlabel is not None:
g.vp[rlabel[1]][i] = m[crange[0] - 1, r]
x = m[c, r]
if str(x).strip() == "":
continue
try:
if float(x) == 0:
continue
except ValueError:
pass
if is_bib is not False:
e = g.add_edge(i, j)
else:
e = g.add_edge(i, j + N)
g.ep.count[e] = x
if is_bib is not False and not directed:
remove_parallel_edges(g)
return g
......@@ -207,39 +207,74 @@ def cache_network():
return wrap
return inner
def get_base_upstream_fname(fname):
if ".zip:" in fname:
fname = fname.split(".zip:")[0] + ".zip"
elif ":zip:" in fname:
fname = fname.split(":zip:")[0]
elif ".tar.gz:" in fname:
fname = fname.split(".tar.gz:")[0] + ".tar.gz"
elif ":tar.gz:" in fname:
fname = fname.split(":tar.gz:")[0]
elif ".rar:" in fname:
fname = fname.split(".rar:")[0] + ".rar"
elif ":rar:" in fname:
fname = fname.split(":rar:")[0]
return fname
@contextmanager
def open_upstream_file(name, fname, mode="r", autocompress=True, **kwargs):
def open_upstream_file(name, fname, mode="r", **kwargs):
base = os.path.dirname(f"cache/upstream/{name}/")
if not os.path.exists(base):
os.makedirs(base)
if ("zip:" not in fname and "tar.gz:" not in fname and "rar:" not in fname) or not autocompress:
os.makedirs(os.path.dirname(f"cache/upstream/{name}/{fname}"),
if mode[0] == "w":
fname = get_base_upstream_fname(fname)
compress = False
if ".bz2" in fname or ".gz" in fname or ".zst" in fname:
fname = fname
else:
fname = fname + ".zst"
compress = True
os.makedirs(os.path.dirname(f"{base}/{fname}"),
exist_ok=True)
if fname.endswith(".gz") and autocompress:
with gzip.open(f"{base}/{fname}", mode, **kwargs) as f:
if "b" in mode:
yield f
else:
with io.TextIOWrapper(f, "utf8") as wf:
yield wf
elif fname.endswith(".xz") and autocompress:
with lzma.open(f"{base}/{fname}", mode, **kwargs) as f:
if "b" in mode:
yield f
else:
with io.TextIOWrapper(f, "utf8") as wf:
yield wf
if compress:
with zst_open(f"{base}/{fname}", mode, **kwargs) as f:
yield f
else:
with open(f"{base}/{fname}", mode, **kwargs) as f:
yield f
elif "tar.gz:" in fname:
else:
if "zip:" not in fname and "tar.gz:" not in fname and "rar:" not in fname:
if fname.endswith(".gz"):
with gzip.open(f"{base}/{fname}", mode, **kwargs) as f:
if "b" in mode:
yield f
else:
with io.TextIOWrapper(f, "utf8") as wf:
yield wf
elif fname.endswith(".xz") and autocompress:
with lzma.open(f"{base}/{fname}", mode, **kwargs) as f:
if "b" in mode:
yield f
else:
with io.TextIOWrapper(f, "utf8") as wf:
yield wf
elif ".zip" in fname or ".bz2" in fname or ".gz" in fname or ".zst" in fname:
with open(f"{base}/{fname}", mode, **kwargs) as f:
yield f
else:
if not os.path.exists(f"{base}/{fname}.zst") and os.path.exists(f"{base}/{fname}"):
print(f"compressing {base}/{fname}...")
with open(f"{base}/{fname}", "rb", **kwargs) as fs:
with zst_open(f"{base}/{fname}.zst", "wb", **kwargs) as ft:
shutil.copyfileobj(fs, ft)
os.remove(f"{base}/{fname}")
with zst_open(f"{base}/{fname}.zst", mode, **kwargs) as f:
yield f
elif "tar.gz:" in fname:
if ".tar.gz" in fname:
archive, fname = fname.split(".tar.gz:")
archive += ".tar.gz"
else:
archive, fname = fname.split(":tar.gz:")
os.makedirs(os.path.dirname(f"cache/upstream/{name}/{archive}"),
exist_ok=True)
with tarfile.open(f"{base}/{archive}", "r:gz") as tf:
with tf.extractfile(fname) as bf:
if "b" in mode:
......@@ -247,41 +282,38 @@ def open_upstream_file(name, fname, mode="r", autocompress=True, **kwargs):
else:
with io.TextIOWrapper(bf, "utf8") as wf:
yield wf
elif "zip:" in fname:
if ".zip" in fname:
archive, fname = fname.split(".zip:")
archive += ".zip"
else:
archive, fname = fname.split(":zip:")
os.makedirs(os.path.dirname(f"cache/upstream/{name}/{archive}"),
exist_ok=True)
with zipfile.ZipFile(f"{base}/{archive}", mode[:1],
compression=zipfile.ZIP_DEFLATED) as zf:
with zf.open(f"{fname}", mode[:1]) as bf:
if "b" in mode:
yield bf
else:
with io.TextIOWrapper(bf, "utf8") as wf:
yield wf
else:
if ".rar" in fname:
archive, fname = fname.split(".rar:")
archive += ".rar"
elif "zip:" in fname:
if ".zip" in fname:
archive, fname = fname.split(".zip:")
archive += ".zip"
else:
archive, fname = fname.split(":zip:")
with zipfile.ZipFile(f"{base}/{archive}", mode[:1],
compression=zipfile.ZIP_DEFLATED) as zf:
with zf.open(f"{fname}", mode[:1]) as bf:
if "b" in mode:
yield bf
else:
with io.TextIOWrapper(bf, "utf8") as wf:
yield wf
else:
archive, fname = fname.split(":rar:")
os.makedirs(os.path.dirname(f"cache/upstream/{name}/{archive}"),
exist_ok=True)
with rarfile.RarFile(f"{base}/{archive}", mode[:1]) as zf:
with zf.open(f"{fname}", mode[:1]) as bf:
if "b" in mode:
yield bf
else:
with io.TextIOWrapper(bf, "utf8") as wf:
yield wf
if ".rar" in fname:
archive, fname = fname.split(".rar:")
archive += ".rar"
else:
archive, fname = fname.split(":rar:")
with rarfile.RarFile(f"{base}/{archive}", mode[:1]) as zf:
with zf.open(f"{fname}", mode[:1]) as bf:
if "b" in mode:
yield bf
else:
with io.TextIOWrapper(bf, "utf8") as wf:
yield wf
_check_health = False
def fetch_upstream_files(name, url_prefix, files, force=False, bad_ssl=False):
def fetch_upstream_files(name, url_prefix, files, force=False, check=True,
bad_ssl=False):
global _check_health
processed = set()
......@@ -289,28 +321,32 @@ def fetch_upstream_files(name, url_prefix, files, force=False, bad_ssl=False):
if isinstance(fnames, str):
fnames = [fnames]
for fname in fnames:
if ".zip:" in fname:
fname = fname.split(".zip:")[0] + ".zip"
if ":zip:" in fname:
fname = fname.split(":zip:")[0]
if ".tar.gz:" in fname:
fname = fname.split(".tar.gz:")[0] + ".tar.gz"
if ":tar.gz:" in fname:
fname = fname.split(":tar.gz:")[0]
if ".rar:" in fname:
fname = fname.split(".rar:")[0] + ".rar"
if ":rar:" in fname:
fname = fname.split(":rar:")[0]
if fname in processed:
continue
if not force and os.path.exists(f"cache/upstream/{name}/{fname}"):
bfname = get_base_upstream_fname(fname)
if bfname in processed:
continue
if check:
try:
with open_upstream_file(name, fname, "rb") as output:
if not force:
continue
except (FileNotFoundError, IOError):
pass
else:
if not force and (os.path.exists(f"cache/upstream/{name}/{bfname}") or
os.path.exists(f"cache/upstream/{name}/{bfname}.zst")):
continue
if url_prefix == "gdrive://":
with open_upstream_file(name, fname, "wb", autocompress=False) as output:
save_gdrive_file(fname, output, _check_health)
if _check_health:
save_gdrive_file(bfname, None)
else:
with open_upstream_file(name, fname, "wb") as output:
save_gdrive_file(bfname, output)
else:
if url_prefix is not None:
url = f"{url_prefix}/{fname}"
url = f"{url_prefix}/{bfname}"
else:
url = fname
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
......@@ -320,10 +356,10 @@ def fetch_upstream_files(name, url_prefix, files, force=False, bad_ssl=False):
if _check_health:
f.read(1)
else:
with open_upstream_file(name, fname, "wb",
autocompress=False) as output:
with open_upstream_file(name, fname, "wb") as output:
shutil.copyfileobj(f, output)
except urllib.error.HTTPError:
print("error fetching:", f"{url_prefix}/{fname}")
raise
processed.add(fname)
processed.add(bfname)
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment