Commit d51f4857 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

analyze.py: add k-nearest-neighbour projection meta-analysis

parent 41f870ba
......@@ -33,10 +33,10 @@ import numpy
import scipy.sparse.linalg
@contextlib.contextmanager
def open_cache(entry, flag="rf"):
def open_cache(name, flag="rf"):
"Open a persistent cache for a given entry."
base = f"cache/analysis/{entry.name}"
base = f"cache/analysis/{name}"
os.makedirs(base, exist_ok=True)
try:
......@@ -96,21 +96,35 @@ def restrict(N, exclude=[]):
return rec
def uses(names):
def rec(f):
@wraps(f)
def wrap(entry, alt, g, cache, *args, **kwargs):
def wrap(*args, **kwargs):
global analyses
x = [analyses[name](entry, alt, g, cache, *args, **kwargs) for name in names]
return f(entry, alt, g, cache, *x, *args, **kwargs)
x = [analyses[name](*args, **kwargs) for name in names]
return f(*args, *x, **kwargs)
return wrap
return rec
def meta_cache(f):
name = f.__name__.split(".")[-1]
@wraps(f)
def wrap(entries, cache, *args, **kwargs):
try:
return cache[name]
except KeyError:
ret = f(entries, *args, **kwargs)
cache[name] = ret
return ret
return wrap
analyses = {}
meta_analyses = {}
titles = {}
scales = {}
def register(name=None, title=None, scale="linear"):
def register(name=None, title=None, scale="linear", meta=False):
"""Decorator that registers the function to the global analyses list, with a
given name and title."""
......@@ -122,9 +136,12 @@ def register(name=None, title=None, scale="linear"):
def reg(f):
nonlocal name
nonlocal meta
if name is None:
name = f.__name__.split(".")[-1]
analyses[name] = f
if meta:
meta_analyses[name] = f
return f
return reg
......@@ -268,7 +285,7 @@ def get_vprops(g):
return vprops
@register("pos")
@restrict(N=10000000, exclude=["openstreetmap"])
@restrict(N=1000000, exclude=["openstreetmap"])
@cache_result
def get_pos(g):
if g.num_vertices() < 1000:
......@@ -279,9 +296,64 @@ def get_pos(g):
x, y = ungroup_vector_property(pos, [0, 1])
return [x.a, y.a]
@register("knn_proj", meta=True)
@meta_cache
def get_knn_proj(entries):
global analyses
global meta_analyses
fs = []
for entry in entries:
if entry.name in ["openstreetmap"]:
continue
for alt, g in entry.parse(lazy=True, cache_only=True):
x = []
for a, f in analyses.items():
if a in ["num_vertices", "num_edges", "pos", "vertex_properties",
"edge_properties"] or a in meta_analyses:
continue
x.append(float(entry.analyses[alt][a]))
fs.append(x)
fs = numpy.array(fs)
for i in range(fs.shape[1]):
idx = numpy.logical_or(numpy.isnan(fs[:,i]),
numpy.isinf(fs[:,i]))
fs[idx,i] = 0
fs[:,i] -= fs[:,i].mean()
fs[:,i] /= fs[:,i].max() - fs[:,i].min()
g, w = generate_knn(fs, k=5)
pos = sfdp_layout(g, multilevel=True)
res = {}
j = 0
for entry in entries:
if entry.name in ["openstreetmap"]:
continue
for alt, _ in entry.parse(lazy=True, cache_only=True):
res[(entry.name, alt)] = list(pos[j])
j += 1
return res
@register("knn_proj_1", "Projected coordinate 1", meta=True)
@uses(["knn_proj"])
@meta_cache
def get_knn_proj_1(entries, ret):
return {k : v[0] for k, v in ret.items()}
@register("knn_proj_2", "Projected coordinate 2", meta=True)
@uses(["knn_proj"])
@meta_cache
def get_knn_proj_2(entries, ret):
return {k : v[1] for k, v in ret.items()}
def analyze_entries(entries, names=[], skip=[], force=[], cache_only=True,
global_cache=False):
global analyses
global meta_analyses
analyze_cache = {}
updated_global_cache = False
......@@ -304,13 +376,13 @@ def analyze_entries(entries, names=[], skip=[], force=[], cache_only=True,
flag = "rf" if cache_only else "c"
with open_cache(entry, flag) as cache:
with open_cache(entry.name, flag) as cache:
entry.analyses = defaultdict(dict)
max_alt = None
Nmax = None
for alt, g in entry.parse(lazy=True, cache_only=True):
for a, f in analyses.items():
if a in skip:
if a in skip or a in meta_analyses:
continue
if len(names) > 0 and a not in names:
continue
......@@ -341,6 +413,21 @@ def analyze_entries(entries, names=[], skip=[], force=[], cache_only=True,
with open("./cache/analyze_cache.pickle", "wb") as f:
pickle.dump(analyze_cache, f)
if cache_only:
with acquire_lock_file("./cache/meta.lock", block=False) as lock:
if lock is None:
return
with open_cache("meta", "rf") as cache:
for a, f in meta_analyses.items():
if a in skip:
continue
ret = f(entries, cache)
for entry in entries:
if entry.name in ["openstreetmap"]:
continue
for alt, g in entry.parse(lazy=True, cache_only=True):
entry.analyses[alt][a] = ret[(entry.name, alt)]
if __name__ == "__main__":
if len(sys.argv) > 1:
names = sys.argv[1:]
......@@ -356,4 +443,9 @@ if __name__ == "__main__":
continue
print("analyzing:", entry.name)
analyze_entries([entry], cache_only=False)
\ No newline at end of file
analyze_entries([entry], cache_only=False)
with acquire_lock_file("./cache/meta.lock", block=True) as lock:
with open_cache("meta", "c") as cache:
for a, f in meta_analyses.items():
ret = f(entries.values(), cache)
......@@ -49,7 +49,7 @@ app.config['JSON_SORT_KEYS'] = False
base = os.path.dirname(__file__)
entries = process_entry.get_entries()
analyze.analyze_entries(entries.values(), skip=["pos"],
analyze.analyze_entries(entries.values(), skip=["pos", "knn_proj"],
global_cache=True)
whales = ["openstreetmap"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment