This repository has been archived by the owner on Jun 20, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 01c5908
Showing
5 changed files
with
67 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
config.py | ||
*.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
report_url = 'https://library.cca.edu/cgi-bin/koha/svc/report?id=345' | ||
opac_url = 'https://library.cca.edu/cgi-bin/koha/opac-detail.pl?biblionumber={id}' | ||
log_filename = 'linkcheck.csv' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import csv | ||
import io | ||
import logging | ||
import urllib3 | ||
|
||
import requests | ||
|
||
import config | ||
|
||
# log to both file & console in CSV-like format, we have to get pretty hacky to | ||
# do CSV formatting for logging a list (not single message value) | ||
logging.basicConfig( | ||
datefmt='%Y-%m-%d %H:%M:%S', | ||
format='"%(asctime)s","%(levelname)s",%(message)s', | ||
handlers=[ | ||
logging.FileHandler(config.log_filename), | ||
logging.StreamHandler(), | ||
], | ||
) | ||
logger = logging.getLogger() | ||
|
||
|
||
def quote(list): | ||
output = io.StringIO() | ||
writer = csv.writer(output, quoting=csv.QUOTE_ALL) | ||
writer.writerow(list) | ||
return output.getvalue().strip() | ||
|
||
# our Koha cert isn't recognized but it's fine, silence this warning | ||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||
report = requests.get(config.report_url, verify=False) | ||
|
||
for bib in report.json(): | ||
# bibs are arrays like [urls string, title, biblionumber] | ||
urls, title, id = bib | ||
# urls are separated by " | " | ||
urls = urls.split(' | ') | ||
for url in urls: | ||
try: | ||
r = requests.get(url) | ||
# distinguish between severity of 5XX & 4XX HTTP errors | ||
if r.status_code >= 500: | ||
logger.error(quote([title, config.opac_url.format(id=id), r.status_code, url])) | ||
elif r.status_code >= 400: | ||
logger.warning(quote([title, config.opac_url.format(id=id), r.status_code, url])) | ||
except: | ||
logger.error(quote([title, config.opac_url.format(id=id), 'HTTP Exception', url])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Check links in MARC records | ||
|
||
Takes a public Koha report and checks each URL (`856$u`) to see if they resolve successfully. | ||
|
||
## Notes | ||
|
||
Use the included `report.sql` to create a SQL report in Koha, be sure to set "Public" to "Yes" so the report JSON can be publicly accessed. | ||
|
||
The app prints URLs with non-200 HTTP response statuses. It also catches HTTP exceptions within the requests library, which can occur when a domain is unavailable. | ||
|
||
Some websites have poor server hygiene and send successful HTTP responses with non-200 error codes. Not a lot we can do about that. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
SELECT bi.url, b.title, b.biblionumber | ||
FROM biblio b | ||
JOIN biblioitems bi USING (biblionumber) | ||
WHERE bi.url <> '' |