Commit 9c209bed authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Add upstream health check

parent 5f8f77b2
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import pkgutil
import sys
import os
import gc
import urllib.error
import googleapiclient.errors
import datetime
import time
import pickle
import process_entry
from locks import acquire_lock
if len(sys.argv) > 1:
names = sys.argv[1:]
else:
names = None
entries = process_entry.get_entries(names).values()
process_entry._check_health = True
for entry in entries:
with acquire_lock(entry, block=False) as lock:
if lock is None:
continue
print("check upstream health,", entry.name, end=": ")
status = 200
date = 0
hfile = f"./cache/upstream/{entry.name}/.health"
try:
with open(hfile, "rb") as f:
status, date = pickle.load(f)
except (FileNotFoundError, IOError, EOFError):
pass
if time.time() - date > 24 * 60 * 60:
try:
entry.fetch_upstream(force=True)
status = 200
except googleapiclient.errors.HttpError as e:
status = e.resp.status
except urllib.error.HTTPError as e:
status = e.code
date = time.time()
with open(hfile, "wb") as f:
pickle.dump((status, date), f)
print(status, date)
\ No newline at end of file
......@@ -31,6 +31,7 @@ import ssl
import importlib
import gc
import shutil
from datetime import datetime
from graph_tool.all import *
......@@ -52,6 +53,16 @@ def get_entries(names=None):
entry.upstream_license = None
if not hasattr(entry, "restricted"):
entry.restricted = False
hfile = f"./cache/upstream/{entry.name}/.health"
try:
with open(hfile, "rb") as f:
status, date = pickle.load(f)
except (FileNotFoundError, IOError, EOFError):
status, date = (200, 0)
date = datetime.fromtimestamp(date)
date = date.ctime()
entry.upstream_status = (status, date)
return {entry.name : entry for entry in entries}
def _coerce_props(g, vforce={}, eforce={}):
......@@ -255,8 +266,11 @@ def open_upstream_file(name, fname, mode="r", autocompress=True, **kwargs):
with io.TextIOWrapper(bf, "utf8") as wf:
yield wf
_check_health = False
def fetch_upstream_files(name, url_prefix, files, force=False, bad_ssl=False):
global _check_health
processed = set()
for fnames, alt, fmt in files:
if isinstance(fnames, str):
......@@ -280,7 +294,7 @@ def fetch_upstream_files(name, url_prefix, files, force=False, bad_ssl=False):
continue
if url_prefix == "gdrive://":
with open_upstream_file(name, fname, "wb", autocompress=False) as output:
save_gdrive_file(fname, output)
save_gdrive_file(fname, output, _check_health)
else:
if url_prefix is not None:
url = f"{url_prefix}/{fname}"
......@@ -290,8 +304,12 @@ def fetch_upstream_files(name, url_prefix, files, force=False, bad_ssl=False):
gctx = ssl.SSLContext() if bad_ssl else None
try:
with urllib.request.urlopen(req, context=gctx) as f:
with open_upstream_file(name, fname, "wb", autocompress=False) as output:
shutil.copyfileobj(f, output)
if _check_health:
f.read(1)
else:
with open_upstream_file(name, fname, "wb",
autocompress=False) as output:
shutil.copyfileobj(f, output)
except urllib.error.HTTPError:
print("error fetching:", f"{url_prefix}/{fname}")
raise
......
......@@ -35,7 +35,16 @@ active
{% endfor %}
</ul>
</dd>
<dt>Upstream URL</dt>
<dt>Upstream URL
<span style="font-size: smaller; vertical-align: text-top; padding-left: 1em">
{% if entry.upstream_status[0] == 200 %}
<span class="ink-label green" title="Upstream health OK. Last checked on {{entry.upstream_status[1]}}">OK</span>
{% else %}
<span class="ink-label red" title="Upstream health NOT OK! Last checked on {{entry.upstream_status[1]}}">{{entry.upstream_status[0]}}</span>
{% endif %}
</span>
</dt>
<dd>
<a href="{{entry.url}}">{{entry.url}}</a>
</dd>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment