Commit cc85872a authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Initial commit

parents
This diff is collapsed.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os.path
import pickle
from collections import defaultdict
from functools import wraps
from graph_tool.all import *
import process_entry
from locks import acquire_lock, acquire_lock_file
from util import *
import contextlib
import shelve
import dbm
import numpy
import scipy.sparse.linalg
@contextlib.contextmanager
def open_cache(entry, flag="rf"):
"Open a persistent cache for a given entry."
base = f"cache/analysis/{entry.name}"
os.makedirs(base, exist_ok=True)
try:
with shelve.open(f"{base}/cache_db", flag=flag) as cache:
yield cache
except dbm.error:
if flag[0] == "r":
yield {}
else:
raise
def cache_result(f):
"Decorator that caches the result of the given function."
name = f.__name__.split(".")[-1]
@wraps(f)
def wrap(entry, alt, g, cache, *args, cache_only=False, force=False,
**kwargs):
try:
if force:
raise KeyError()
if alt is not None:
try:
ret = cache[repr((alt, name))]
except KeyError:
ret = cache[alt][name]
else:
ret = cache[name]
return ret
except KeyError:
if cache_only:
return None
if g is None:
return None
print(f"\t\t{alt} {name}...")
ret = f(g(), *args, **kwargs)
if alt is not None:
cache[repr((alt, name))] = ret
else:
cache[name] = ret
return ret
return wrap
def restrict(N, exclude=[]):
"""Decorator that restricts the function call to networks with N nodes or less,
and that do not belong to the exclude list, and returns None otherwise.
"""
def rec(f):
@wraps(f)
def wrap(entry, alt, g, cache, *args, **kwargs):
N_e = get_N(entry, alt, g, cache, *args, **kwargs)
if N_e > N or entry.name in exclude:
return None
return f(entry, alt, g, cache, *args, **kwargs)
return wrap
return rec
def uses(names):
def rec(f):
@wraps(f)
def wrap(entry, alt, g, cache, *args, **kwargs):
global analyses
x = [analyses[name](entry, alt, g, cache, *args, **kwargs) for name in names]
return f(entry, alt, g, cache, *x, *args, **kwargs)
return wrap
return rec
analyses = {}
titles = {}
def register(name=None, title=None):
"""Decorator that registers the function to the global analyses list, with a
given name and title."""
global analyses
global titles
titles[name] = title
def reg(f):
nonlocal name
if name is None:
name = f.__name__.split(".")[-1]
analyses[name] = f
return f
return reg
@register("num_edges", "Number of edges")
@cache_result
def get_E(g):
return g.num_edges()
@register("num_vertices", "Number of vertices")
@cache_result
def get_N(g):
return g.num_vertices()
@register(title="Directed")
@cache_result
def is_directed(g):
return g.is_directed()
@register("average_degree", "Average degree")
@cache_result
def get_ak(g):
if g.is_directed():
return g.num_edges() / g.num_vertices()
else:
return 2 * g.num_edges() / g.num_vertices()
@register("degree_std_dev", "Degree standard deviation")
@cache_result
def get_kdev(g):
g = GraphView(g, directed=False)
k = g.get_out_degrees(g.get_vertices())
return k.std()
@register("is_bipartite", "Bipartite")
@cache_result
def is_bip(g):
return is_bipartite(g)
@register("global_clustering", "Global clustering")
@cache_result
def get_clustering(g):
if is_bipartite(g):
return 0.
return global_clustering(g)[0]
@register("degree_assortativity", "Degree assortativity")
@cache_result
def get_assortativity(g):
g = GraphView(g, directed=False)
return scalar_assortativity(g, "out")[0]
@register("largest_component_fraction", "Size of largest component")
@cache_result
def get_S(g):
c = label_largest_component(g, directed=False)
return c.fa.sum() / g.num_vertices()
@register("edge_reciprocity", "Edge reciprocity")
@cache_result
def get_reciprocity(g):
if g.is_directed():
return edge_reciprocity(g)
return 1.
@register("transition_gap", "Second eigenvalue of transition matrix")
@cache_result
def get_tgap(g):
g = GraphView(g, directed=False)
u = extract_largest_component(g)
if u.num_vertices() != g.num_vertices():
g = u
if 2 >= g.num_vertices() - 1:
return numpy.nan
T = transition(g, operator=True)
ew = scipy.sparse.linalg.eigs(T, k=2, which="LR", return_eigenvectors=False)
return float(min(ew.real))
@register("mixing_time", "Random walk mixing time")
@uses(["transition_gap"])
@cache_result
def get_mixing(g, tgap):
if tgap <= 0:
return numpy.inf
return -1/numpy.log(tgap)
@register("hashimoto_radius", "Largest eigenvalue of non-backtracking matrix")
@cache_result
def get_hgap(g):
g = GraphView(g, directed=False)
remove_parallel_edges(g)
T = hashimoto(g, compact=True, operator=True)
ew = scipy.sparse.linalg.eigs(T, k=1, which="LR", return_eigenvectors=False)
g.clear()
return float(ew.real[0])
@register("diameter", "(Pseudo-) diameter")
@cache_result
def get_diameter(g):
g = GraphView(g, directed=False)
u = extract_largest_component(g)
if u.num_vertices() != g.num_vertices():
g = u
if g.num_vertices() > 10000:
d = pseudo_diameter(g)[0]
else:
d = max([shortest_distance(g, source=v).a.max() for v in g.vertices()])
return int(d)
@register("edge_properties")
@cache_result
def get_eprops(g):
eprops = []
for k, v in g.ep.items():
eprops.append((k, v.value_type()))
return eprops
@register("vertex_properties")
@cache_result
def get_vprops(g):
vprops = []
for k, v in g.vp.items():
vprops.append((k, v.value_type()))
return vprops
@register("pos")
@restrict(N=10000000, exclude=["openstreetmap"])
@cache_result
def get_pos(g):
if g.num_vertices() < 1000:
step = .99
else:
step = .95
pos = sfdp_layout(g, multilevel=True, cooling_step=step)
x, y = ungroup_vector_property(pos, [0, 1])
return [x.a, y.a]
def analyze_entries(entries, names=[], skip=[], force=[], cache_only=True,
global_cache=False):
global analyses
analyze_cache = {}
if global_cache:
with acquire_lock_file("./cache/analyze_cache.lock", block=True) as lock:
try:
analyze_cache = pickle.load(open("./cache/analyze_cache.pickle", "rb"))
except FileNotFoundError:
pass
for entry in entries:
if hasattr(entry, "analyzes"):
continue
if entry.name in analyze_cache:
entry.analyses, entry._analyses = analyze_cache[entry.name]
continue
flag = "rf" if cache_only else "c"
with open_cache(entry, flag) as cache:
entry.analyses = defaultdict(dict)
max_alt = None
Nmax = None
for alt, g in entry.parse(lazy=True, cache_only=True):
for a, f in analyses.items():
if a in skip:
continue
if len(names) > 0 and a not in names:
continue
v = f(entry, alt, g, cache, force=a in force,
cache_only=cache_only)
if isinstance(v, PropertyArray):
v = float(v)
entry.analyses[alt][a] = v
N = entry.analyses[alt]["num_vertices"]
if Nmax is None:
max_alt = alt
Nmax = N
elif N > Nmax:
max_alt = alt
Nmax = N
del g
entry._analyses = entry.analyses[max_alt]
if global_cache:
analyze_cache[entry.name] = (entry.analyses, entry._analyses)
if global_cache:
with acquire_lock_file("./cache/analyze_cache.lock", block=True) as lock:
pickle.dump(analyze_cache, open("./cache/analyze_cache.pickle", "wb"))
if __name__ == "__main__":
if len(sys.argv) > 1:
names = sys.argv[1:]
else:
names = None
entries = process_entry.get_entries(names)
for entry in entries.values():
with acquire_lock(entry, block=False) as lock:
if lock is None:
continue
print("analyzing:", entry.name)
analyze_entries([entry], cache_only=False)
\ No newline at end of file
#!/bin/env bash
for f in `find cache/network -name "*zst"`; do
echo $f;
zstdmt --test $f;
if [[ $? != 0 ]]; then
rm $f;
fi
done
for f in `find cache/network -name "*zip"`; do
echo $f;
unzip -t $f;
if [[ $? != 0 ]]; then
rm $f;
fi
done
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import pkgutil
import sys
import os
import gc
import csv
import zipfile
import graph_tool
import process_entry
from locks import acquire_lock
from util import *
def convert_entry(entry, alts=None, force=False):
for alt, g in entry.parse(lazy=True):
if alts is not None and alt not in alts:
continue
name = alt
if name is None:
name = "network"
for fmt in ["xml", "gml", "csv"]:
if fmt != "csv":
fname = f"cache/network/{entry.name}/{name}.{fmt}.zst"
else:
fname = f"cache/network/{entry.name}/{name}.{fmt}.zip"
if not os.path.exists(fname) or force:
print("\t", alt, fmt)
if not isinstance(g, graph_tool.Graph):
g = g()
if fmt != "csv":
with zst_open(fname, "wb") as f:
g.save(f, fmt=fmt)
else:
save_graph_to_csv(g, fname)
else:
print("\t", alt, fmt, "skipped")
del g
gc.collect()
def save_graph_to_csv(g, filename):
dialect = dict(delimiter=',', quotechar='"', escapechar='\\',
doublequote=False, quoting=csv.QUOTE_MINIMAL)
with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as zf:
with zf.open('edges.csv', "w", force_zip64=True) as fb:
with io.TextIOWrapper(fb) as f:
eprops = list(g.ep.keys())
if len(eprops) == 0:
f.write("# source, target\n")
else:
f.write("# source, target, " + ", ".join(eprops) + "\n")
eprops = list(g.ep.values())
writer = csv.writer(f, **dialect)
for e in g.edges():
writer.writerow([int(e.source()), int(e.target())] +
[p[e] for p in eprops])
with zf.open('nodes.csv', "w", force_zip64=True) as fb:
with io.TextIOWrapper(fb) as f:
vprops = list(g.vp.keys())
if len(vprops) == 0:
f.write("# index \n")
else:
f.write("# index, " + ", ".join(vprops) + "\n")
vprops = list(g.vp.values())
writer = csv.writer(f, **dialect)
for v in g.vertices():
writer.writerow([int(v)] + [p[v] for p in vprops])
with zf.open('gprops.csv', "w") as fb:
with io.TextIOWrapper(fb) as f:
f.write("# prop_name, value\n")
writer = csv.writer(f, **dialect)
for k, p in g.gp.items():
writer.writerow([k, p[g]])
if __name__ == "__main__":
if len(sys.argv) > 1:
names = sys.argv[1:]
else:
names = None
entries = process_entry.get_entries(names)
for entry in entries.values():
with acquire_lock(entry, block=False) as lock:
if lock is None:
continue
print("entry:", entry.name)
convert_entry(entry)
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import pkgutil
import sys
import os
import gc
import numpy.random
import graph_tool
from process_entry import *
from analyze import *
from update_props import *
from draw import *
from locks import acquire_lock
if len(sys.argv) > 1:
names = sys.argv[1:]
else:
names = None
entries = list(process_entry.get_entries(names).values())
if len(sys.argv) == 1:
analyze_entries(entries, names=["num_vertices"])
f = lambda x: x if x is not None else 0
entries = sorted(entries, key=lambda e: f(e._analyses.get("num_vertices", 0)))
for entry in entries:
with acquire_lock(entry, block=False) as lock:
if lock is None:
continue
print("entry:", entry.name)
print("\tfetching...")
entry.fetch_upstream()
print("\tparsing...")
for alt, g in entry.parse(lazy=True):
print("\tparsed:", alt)
print("\tanalysing...")
analyze_entries([entry], cache_only=False)
updated = False
for alt, g in update_props(entry, ret_all=False):
print("\tconverting (w/ updated props)...")
convert_entry(entry, alts=[alt], force=True)
updated = True
if updated:
analyze_entries([entry], cache_only=False,
force=["vertex_properties",
"edge_properties"])
print("drawing...")
for alt, g in entry.parse(cache_only=True, lazy=True):
print(f"\t{alt}...")
draw_entry(entry, alt)
convert_entry(entry)
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
from functools import wraps
import graph_tool.all as gt
import process_entry
from analyze import analyze_entries
from locks import acquire_lock
from util import *
def cache_draw(f):
@wraps(f)
def wrap(entry, alt, **kwargs):
if len(kwargs) > 0:
return f(entry, alt, **kwargs)
base = f"{process_entry.root}/cache/draw/{entry.name}"
fname = f"{base}/{alt}.png"
if not os.path.exists(fname):
os.makedirs(base, exist_ok=True)
buf = f(entry, alt)
if buf is not None:
with open(fname, "wb") as fo:
shutil.copyfileobj(buf, fo)
if os.path.exists(fname):
return open(fname, "rb")
return None
return wrap
@cache_draw
def draw_entry(entry, alt, svg=False, size=1000, bg_color="#cdcdcd",
edge_color=None, full=False):
if svg:
fmt = "svg"
else:
fmt = "png"
try:
if alt is None:
props = entry._analyses["vertex_properties"]
else:
props = entry.analyses[alt]["vertex_properties"]
props = [p[0] for p in props]
if "_pos" not in props:
raise KeyError
buf = io.BytesIO()
for alt, g in entry.parse(alts=[alt], cache_only=True):