This repository has been archived by the owner on Jun 20, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinkcheck.py
52 lines (44 loc) · 1.53 KB
/
linkcheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import csv
import io
import logging
import httpx
import config
# log to both file & console in CSV-like format, we have to get pretty hacky to
# do CSV formatting for logging a list (not single message value)
logging.basicConfig(
datefmt='%Y-%m-%d %H:%M:%S',
format='"%(asctime)s","%(levelname)s",%(message)s',
handlers=[
logging.FileHandler(config.log_filename),
logging.StreamHandler(),
],
)
logger = logging.getLogger()
def quote(list):
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_ALL)
writer.writerow(list)
return output.getvalue().strip()
report = httpx.get(config.report_url)
sums = { "exception": 0 }
for bib in report.json():
# bibs are arrays like [urls string, title, biblionumber]
urls, title, id = bib
# urls are separated by " | "
urls = urls.split(' | ')
for url in urls:
try:
r = httpx.get(url, follow_redirects=True)
status = r.status_code
if not sums.get(status): sums[status] = 0
sums[status] += 1
# distinguish between severity of 5XX & 4XX HTTP errors
if status >= 500:
logger.error(quote([title, config.opac_url.format(id=id), status, url]))
elif status >= 400:
logger.warning(quote([title, config.opac_url.format(id=id), status, url]))
except:
logger.error(quote([title, config.opac_url.format(id=id), 'HTTP Exception', url]))
sums["exception"] += 1
print('Link check summary:')
print(sums)