Commit 402cd86f authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Revamp stats page plot using d3js

parent 8c2bd870
......@@ -108,8 +108,9 @@ def uses(names):
analyses = {}
titles = {}
scales = {}
def register(name=None, title=None):
def register(name=None, title=None, scale="linear"):
"""Decorator that registers the function to the global analyses list, with a
given name and title."""
......@@ -117,6 +118,7 @@ def register(name=None, title=None):
global titles
titles[name] = title
scales[name] = scale
def reg(f):
nonlocal name
......@@ -127,22 +129,22 @@ def register(name=None, title=None):
return reg
@register("num_edges", "Number of edges")
@register("num_edges", "Number of edges", "log")
@cache_result
def get_E(g):
return g.num_edges()
@register("num_vertices", "Number of vertices")
@register("num_vertices", "Number of vertices", "log")
@cache_result
def get_N(g):
return g.num_vertices()
@register(title="Directed")
@register("is_directed", title="Directed")
@cache_result
def is_directed(g):
return g.is_directed()
@register("average_degree", "Average degree")
@register("average_degree", "Average degree", "log")
@cache_result
def get_ak(g):
if g.is_directed():
......@@ -150,7 +152,7 @@ def get_ak(g):
else:
return 2 * g.num_edges() / g.num_vertices()
@register("degree_std_dev", "Degree standard deviation")
@register("degree_std_dev", "Degree standard deviation", "symlog")
@cache_result
def get_kdev(g):
g = GraphView(g, directed=False)
......@@ -201,7 +203,7 @@ def get_tgap(g):
ew = scipy.sparse.linalg.eigs(T, k=2, which="LR", return_eigenvectors=False)
return float(min(ew.real))
@register("mixing_time", "Random walk mixing time")
@register("mixing_time", "Random walk mixing time", "log")
@uses(["transition_gap"])
@cache_result
def get_mixing(g, tgap):
......@@ -209,7 +211,7 @@ def get_mixing(g, tgap):
return numpy.inf
return -1/numpy.log(tgap)
@register("hashimoto_radius", "Largest eigenvalue of non-backtracking matrix")
@register("hashimoto_radius", "Largest eigenvalue of non-backtracking matrix", "log")
@cache_result
def get_hgap(g):
g = GraphView(g, directed=False)
......
......@@ -26,17 +26,14 @@ from collections import defaultdict
import functools
import pickle
from flask import Flask, render_template, make_response, redirect, Markup, \
send_file, abort, request, jsonify
send_file, abort, request, jsonify, url_for
import process_entry
import analyze
import draw
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "Serif"
plt.switch_backend("svg")
import numpy
import math
app = Flask(__name__)
......@@ -55,6 +52,8 @@ entries = process_entry.get_entries()
analyze.analyze_entries(entries.values(), skip=["pos"],
global_cache=True)
whales = ["openstreetmap"]
@app.context_processor
def file_processor():
def file_size(filename):
......@@ -154,8 +153,8 @@ def network_download(net, filename):
@app.route("/stats")
def stats_page():
global entries
full = request.args.get('full', False)
if not request.script_root:
request.script_root = url_for('main_page', _external=True)
n_data = len(entries)
n_nets = sum(len(entry.files) for entry in entries.values())
......@@ -177,8 +176,7 @@ def stats_page():
tags = list(tags.items())
tags = sorted(tags, key=lambda x: -x[1])
big = ["openstreetmap"]
entries_s = {name: e for name, e in entries.items() if name not in big}
entries_s = {name: e for name, e in entries.items() if name not in whales}
n_nets_s = sum(len(entry.files) for entry in entries_s.values())
n_directed_s = 0
n_undirected_s = 0
......@@ -192,119 +190,13 @@ def stats_page():
if entry.analyses[alt]["is_bipartite"]:
n_bip_s += 1
plots = [[get_plot("num_vertices", "num_edges", x_scale="log", y_scale="log", full=full),
get_plot("num_vertices", "average_degree", x_scale="log", y_scale="log", full=full),
get_plot("num_vertices", "degree_std_dev", x_scale="log", y_scale="symlog", full=full)],
[get_plot("num_vertices", "global_clustering", x_scale="log", full=full),
get_plot("num_vertices", "degree_assortativity", x_scale="log", full=full),
get_plot("num_vertices", "largest_component_fraction", x_scale="log", full=full)],
[get_plot("average_degree", "global_clustering", x_scale="log", full=full),
get_plot("average_degree", "degree_assortativity", x_scale="log", full=full),
get_plot("degree_assortativity", "global_clustering", full=full)],
[get_plot("average_degree", "mixing_time", x_scale="log", y_scale="log", full=full),
get_plot("average_degree", "hashimoto_radius", x_scale="log", y_scale="log", full=full),
get_plot("hashimoto_radius", "mixing_time", x_scale="log", y_scale="log", full=full)],
[get_plot("global_clustering", "mixing_time", y_scale="log", full=full),
get_plot("global_clustering", "hashimoto_radius", y_scale="log", full=full),
get_plot("degree_assortativity", "mixing_time", y_scale="log", full=full)]]
return render_template('stats.html', n_data=n_data, n_nets=n_nets,
tags=tags, n_directed=n_directed,
n_undirected=n_undirected, n_bip=n_bip,
n_nets_s=n_nets_s, n_directed_s=n_directed_s,
n_undirected_s=n_undirected_s, n_bip_s=n_bip_s,
big=",".join(big), plots=plots, full=full)
@functools.lru_cache()
def get_plot(x, y, x_scale="linear", y_scale="linear", width=5, height=5, full=True):
global entries
xs = []
ys = []
is_bip = []
is_dir = []
urls = []
for entry in entries.values():
if full:
if entry.name in ["openstreetmap"]:
continue
for f, alt, fmt in entry.files:
xs.append(entry.analyses[alt][x])
ys.append(entry.analyses[alt][y])
urls.append(f"/net/{entry.name}")
is_bip.append(bool(entry.analyses[alt]["is_bipartite"]))
is_dir.append(bool(entry.analyses[alt]["is_directed"]))
else:
xs.append(entry._analyses[x])
ys.append(entry._analyses[y])
urls.append(f"/net/{entry.name}")
is_bip.append(bool(entry._analyses["is_bipartite"]))
is_dir.append(bool(entry._analyses["is_directed"]))
fig, ax = plt.subplots(1, 1, figsize=(width, height))
xs = numpy.array(xs)
ys = numpy.array(ys)
is_bip = numpy.array(is_bip)
is_dir = numpy.array(is_dir)
urls = numpy.array(urls)
style = dict(c="#2e3436", edgecolors="#eeeeec", linewidths=.5, alpha=.8)
idx = numpy.logical_and(numpy.logical_not(is_dir),
numpy.logical_not(is_bip))
s = ax.scatter(xs[idx], ys[idx], marker="o", **style)
s.set_urls(urls[idx])
idx = numpy.logical_and(is_dir,
numpy.logical_not(is_bip))
s = ax.scatter(xs[idx], ys[idx], marker="s", **style)
s.set_urls(urls[idx])
idx = is_bip
s = ax.scatter(xs[idx], ys[idx], marker="^", **style)
s.set_urls(urls[idx])
ax.set_xscale(x_scale)
ax.set_yscale(y_scale)
ax.set_xlabel(analyze.titles[x])
ax.set_ylabel(analyze.titles[y])
ax.autoscale()
fig.tight_layout()
buf = io.StringIO()
fig.savefig(buf, format="svg")
buf = buf.getvalue()
p = re.compile('xlink:href="/net/([^"]*)"')
return p.sub(r'xlink:href="/net/\1" xlink:title="\1"', buf)
@app.route("/plot")
def plot():
global entries
x = request.args.get('x', None)
y = request.args.get('y', None)
if x is None or y is None:
abort(404)
x_scale = request.args.get('x_scale', "linear")
y_scale = request.args.get('y_scale', "linear")
width = float(request.args.get('width', 5))
height = float(request.args.get('height', 5))
buf = get_plot(x, y, log_x=log_x, log_y=log_y,
width=width, height=height)
buf = io.TextIO(buf)
return send_file(buf, mimetype="image/svg+xml")
whales=",".join(whales), analyses=analyze.titles,
scales=analyze.scales)
@app.route("/draw/<net>")
@app.route("/draw/<net>/<alt>")
......@@ -336,6 +228,20 @@ def net_draw(net, alt=None):
def api():
return render_template('api.html')
def clean_floats(d):
new_d = {}
for k, v in d.items():
if isinstance(v, dict):
v = clean_floats(v)
else:
try:
if math.isnan(v) or math.isinf(v):
v = None
except TypeError:
pass
new_d[k] = v
return new_d
def get_entry_json(entry):
return dict(title=entry.title,
description=entry.description,
......@@ -344,8 +250,8 @@ def get_entry_json(entry):
restricted=entry.restricted,
nets=[(alt if alt is not None else entry.name)
for f,alt,fmt in entry.files],
analyses=entry.analyses if len(entry.files) > 1
else entry.analyses[None])
analyses=clean_floats(entry.analyses if len(entry.files) > 1
else entry.analyses[None]))
@app.route("/api/nets")
def api_entries():
......@@ -353,9 +259,13 @@ def api_entries():
tags = request.args.get('tags', None)
if tags is not None:
tags = set(tags.split(","))
full = request.args.get('full', False)
exclude = request.args.get('exclude', None)
if exclude is not None:
exclude = set(exclude.split(","))
full = bool(request.args.get('full', False))
fentries = [entry for entry in entries.values()
if tags is None or len(set(entry.tags) & tags) > 0]
if ((tags is None or len(set(entry.tags) & tags) > 0) and
(exclude is None or entry.name not in exclude))]
if full:
return {entry.name: get_entry_json(entry) for entry in fentries}
else:
......@@ -383,4 +293,4 @@ def issues_page():
return redirect("//git.skewed.de/count0/netzschleuder/issues/")
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0')
app.run(debug=True, host='0.0.0.0')
......@@ -91,12 +91,7 @@
<nav class="ink-navigation xlarge-push-right large-push-right">
<ul class="menu horizontal black shadowed">
<li class="heading {% block net_active %}{% endblock %}"><a href="/">Networks</a></li>
<li class="{% block stats_active %}{% endblock %}"><a href="/stats">Stats</a>
<ul class="submenu" style="white-space: nowrap;">
<li><a href="/stats">Individual datasets</a></li>
<li><a href="/stats?full=True">All networks</a></li>
</ul>
</li>
<li class="{% block stats_active %}{% endblock %}"><a href="/stats">Stats</a></li>
<li class="{% block api_active %}{% endblock %}"><a href="/api">API</a></li>
<li class="{% block git_active %}{% endblock %}"><a href="/git">Git</a></li>
<li class="{% block issues_active %}{% endblock %}"><a href="/issues">Issues</a></li>
......
......@@ -7,11 +7,23 @@ active
{% block content %}
<!-- Load d3.js -->
<script src="https://d3js.org/d3.v4.js"></script>
<script src="https://d3js.org/d3-scale.v3.js"></script>
<script type=text/javascript>
$SCRIPT_ROOT = {{ request.script_root|tojson|safe }};
</script>
<style>
svg {
height: auto;
width: 100%;
}
div.tooltip {
position: absolute;
text-align: center;
padding: 4px;
font: 12px sans-serif;
background: lightsteelblue;
border: 0px;
}
</style>
<div class="column-group">
......@@ -30,7 +42,7 @@ active
{{fmt_int(n_bip) }} bipartite)
</span>
</dd>
<dt>Number of networks (excluding {{ big }})</dt>
<dt>Number of networks (excluding {{ whales }})</dt>
<dd>{{ fmt_int(n_nets_s) }}
<span style="margin-left: 2em">
({{fmt_int(n_undirected_s) }} undirected,
......@@ -50,22 +62,328 @@ active
</dd>
<br/>
{% if full %}
<dt>Network properties (one point per network) </dt>
{% else %}
<dt>Network properties (one point per dataset) </dt>
{% endif %}
<p>Clicking on the plot marker will lead you to the corresponding dataset page.</p>
<dt>Network properties</dt>
<p>Clicking on the plot marker will lead you to the corresponding dataset page. Scrolling zooms, and click pans.</p>
<dd>
<table>
{% for row in plots %}
<tr>
{% for plot in row %}
<td style="padding: 1em;">{{plot | safe}}</td>
<div style="text-align: center;">
<div style='display: inline-grid; grid-template-columns: min-content min-content; align-items: center; grid-column-gap: 2em;'>
<div>
<svg id="scatter_area" height=650 width=650> </svg>
</div>
<div id="selectors">
<div style='display: grid; width=20%; grid-template-columns: max-content max-content; grid-column-gap: 1em; grid-row-gap: 1em; align-items: center;'>
<div>
X axis:
</div>
<select id="x_axis">
{% for a, title in analyses.items() %}
{% if title is not none and a != "is_directed" and a != "is_bipartite" %}
{% if a == "num_vertices" %}
<option value="{{a}}" selected="selected">{{title}}</option>
{% else %}
<option value="{{a}}">{{title}}</option>
{% endif %}
{% endif %}
{% endfor %}
</select>
<div>
Y axis:
</div>
<select id="y_axis">
{% for a, title in analyses.items() %}
{% if title is not none and a != "directed" and a != "is_bipartite" %}
{% if a == "num_edges" %}
<option value="{{a}}" selected="selected">{{title}}</option>
{% else %}
<option value="{{a}}">{{title}}</option>
{% endif %}
{% endif %}
{% endfor %}
</select>
<div>
Size :
</div>
<select id="c_size">
{% for a, title in analyses.items() %}
{% if title is not none %}
{% if a == "average_degree" %}
<option value="{{a}}" selected="selected">{{title}}</option>
{% else %}
<option value="{{a}}">{{title}}</option>
{% endif %}
{% endif %}
{% endfor %}
</select>
<div>
Color :
</div>
<select id="c_color">
{% for a, title in analyses.items() %}
{% if title is not none %}
{% if a == "global_clustering" %}
<option value="{{a}}" selected="selected">{{title}}</option>
{% else %}
<option value="{{a}}">{{title}}</option>
{% endif %}
{% endif %}
{% endfor %}
</select>
</div>
</div>
</div>
</div>
<script>
// append the svg object to the body of the page
var svg = d3.select("#scatter_area");
var tooltip = d3.select("body").append("div")
.attr("class", "tooltip")
.style("opacity", 0);
titles = {
{% for a, title in analyses.items() %}
"{{a}}" : "{{ title }}",
{% endfor %}
</tr>
{% endfor %}
</table>
}
scales = {
{% for a, scale in scales.items() %}
"{{a}}" : "{{ scale }}",
{% endfor %}
}
function draw(nets) {
var x_col = d3.select("#x_axis").property("value");
var y_col = d3.select("#y_axis").property("value");
var size_col = d3.select("#c_size").property("value");
var color_col = d3.select("#c_color").property("value");
// Create data
svg.selectAll("*").remove();
// set the dimensions and margins of the graph
var margin = {top: 10, right: 40, bottom: 60, left: 60},
width = Math.max(window.innerWidth - d3.select("#selectors").node().getBoundingClientRect().width - 300, 450),
height = 650;
svg.attr("width", width)
.attr("height", height)
.append("g");
var data = []
for (var n in nets) {
var net = nets[n]
if (net["nets"].length == 1) {
data.push( {x: net["analyses"][x_col],
y: net["analyses"][y_col],
size : net["analyses"][size_col],
color : net["analyses"][color_col],
name: n,
alt: ""});
}
else {
for (var alt of net["nets"]) {
data.push( {x: net["analyses"][alt][x_col],
y: net["analyses"][alt][y_col],
size: net["analyses"][alt][size_col],
color: net["analyses"][alt][color_col],
name: n,
alt: alt});
}
}
}
var superscript = "⁰¹²³⁴⁵⁶⁷⁸⁹";
formatPower = function(d) { return (d + "").split("").map(function(c) { return superscript[c]; }).join(""); };
function tfmt(d) {
var e = Math.round(Math.log(d) / Math.LN10);
if (Math.abs(Math.log(d) / Math.LN10 - e) < 1e-6)
return 10 + formatPower(e);
return null
}
// X scale and Axis
if (scales[x_col] == "log")
var x = d3.scaleLog();
else if (scales[x_col] == "symlog")
var x = d3.scaleSymlog();
else
var x = d3.scaleLinear();
x.domain([d3.min(data, function (d) { return d.x; }),
d3.max(data, function (d) { return d.x; })]).nice()
.range([margin.left, width - margin.right]);
g_x = svg.append('g')
.attr("transform", `translate(0,${height - margin.bottom})`)
if (scales[x_col] == "log")
x_axis = d3.axisBottom(x).ticks(Math.floor(Math.log10(x.domain()[1] - x.domain()[0])) * 10, tfmt);
else
x_axis = d3.axisBottom(x);
g_x.call(x_axis)
// Y scale and Axis
if (scales[y_col] == "log")
var y = d3.scaleLog();
else if (scales[y_col] == "symlog")
var y = d3.scaleSymlog();
else
var y = d3.scaleLinear();
y.domain([d3.min(data, function (d) { return d.y; }),
d3.max(data, function (d) { return d.y; })]).nice()
.range([height - margin.bottom, margin.top]);
g_y = svg.append('g')
.attr("transform", `translate(${margin.left},0)`)
if (scales[y_col] == "log")
y_axis = d3.axisLeft(y).ticks(Math.floor(Math.log10(y.domain()[1] - y.domain()[0])) * 10, tfmt);
else
y_axis = d3.axisLeft(y);
g_y.call(y_axis)
// text label for the x axis
svg.append("text")
.attr("transform",
"translate(" + (width/2) + " ," +
(height - 20) + ")")
.style("text-anchor", "middle")
.text(titles[x_col]);
// text label for the y axis
svg.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 0)
.attr("x", 0 - (height / 2))
.attr("dy", "1em")
.style("text-anchor", "middle")
.text(titles[y_col]);
// size
if (scales[size_col] == "log")
var size = d3.scaleLog();
else if (scales[size_col] == "symlog")
var size = d3.scaleSymlog();
else
var size = d3.scaleLinear();
size.domain([d3.min(data, function (d) { return d.size; }),
d3.max(data, function (d) { return d.size; })])
.range([4, 150]);
// color
if (scales[color_col] == "log")
var cr = d3.scaleLog();
else if (scales[color_col] == "symlog")
var cr = d3.scaleSymlog();
else
var cr = d3.scaleLinear();
cr.domain([d3.min(data, function (d) { return d.color; }),
d3.max(data, function (d) { return d.color; })]).
range([0, 1]);
var color = d3.scaleSequential().domain([0, 1])
.interpolator(d3.interpolateInferno);
svg.append('defs')
.append('clipPath')
.attr('id', 'clip')
.append('rect')
.attr('x', margin.left)
.attr('y', margin.top)
.attr('width', width - margin.right - margin.left)
.attr('height', height - margin.bottom - margin.top);
const main = svg.append('g')
.attr('class', 'main')