Skip to content

Commit

Permalink
External link checking with refcache
Browse files Browse the repository at this point in the history
  • Loading branch information
chalin committed Nov 7, 2024
1 parent 694c91b commit dbcae4a
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 9 deletions.
14 changes: 12 additions & 2 deletions .htmltest.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
DirectoryPath: public
CheckDoctype: false # Sadly, this is false only because of `static/google*.html`
TestFilesConcurrently: true
IgnoreAltMissing: true # FIXME
IgnoreDirectoryMissingTrailingSlash: true # FIXME
IgnoreDirs: [_print] # FIXME
IgnoreDirs:
- ^blog/(\d+/)?page/\d+
IgnoreEmptyHref: true # FIXME
IgnoreInternalEmptyHash: true # FIXME
IgnoreInternalURLs: # list of paths
IgnoreURLs: # list of regexs of paths or URLs to be ignored
- ^https://twitter.com/docsydocs$
- ^https?://localhost\b
# Ignore Docsy-generated GitHub links for now
- ^https?://github\.com/.*?/.*?/(new|edit)/ # view-page, edit-source etc
# Ignore links to GH repo content for now.
# - ^https?://github\.com/.*?/.*?/(blob|tree)/
# Sites that deny access, always yielding 401, 403 Forbidden, 406, or other:
- ^https://(www\.)?linkedin\.com\b # 999 Request Denied
- ^https://twitter.com
- ^https://x.com
38 changes: 36 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Set REFCACHE to another value to disable htmltest refcache-file manipulation
REFCACHE?=refcache
HTMLTEST_DIR=tmp
HTMLTEST?=htmltest # Specify as make arg if different
HTMLTEST_ARGS?=--skip-external
HTMLTEST_ARGS?=--log-level 1
LINK_CACHE_FILE?=refcache.json
LINK_CACHE_FILE_DEST_DIR?=static
LINK_CACHE_FILE_SRC_DIR?=$(HTMLTEST_DIR)/.htmltest

# Use $(HTMLTEST) in PATH, if available; otherwise, we'll get a copy
ifeq (, $(shell which $(HTMLTEST)))
Expand All @@ -10,7 +15,36 @@ GET_LINK_CHECKER_IF_NEEDED=get-link-checker
endif
endif

check-links: $(GET_LINK_CHECKER_IF_NEEDED)
default:
@echo "Make what? Target list:\n"
@make -rpn | grep '^[a-z]\S*:' | sed 's/://' | sort

$(LINK_CACHE_FILE_SRC_DIR):
mkdir -p $(LINK_CACHE_FILE_SRC_DIR)

$(LINK_CACHE_FILE_DEST_DIR)/$(LINK_CACHE_FILE):
mkdir -p $(LINK_CACHE_FILE_DEST_DIR)
echo '{}' > $(LINK_CACHE_FILE_DEST_DIR)/$(LINK_CACHE_FILE)

refcache-restore: $(LINK_CACHE_FILE_DEST_DIR)/$(LINK_CACHE_FILE) $(LINK_CACHE_FILE_SRC_DIR)
ifeq (refcache, $(REFCACHE))
cp $(LINK_CACHE_FILE_DEST_DIR)/$(LINK_CACHE_FILE) $(LINK_CACHE_FILE_SRC_DIR)/
else
@echo "SKIPPING refcache-restore"
endif

refcache-save:
ifeq (refcache, $(REFCACHE))
cp $(LINK_CACHE_FILE_SRC_DIR)/$(LINK_CACHE_FILE) $(LINK_CACHE_FILE_DEST_DIR)/
npx prettier --prose-wrap=always --write $(LINK_CACHE_FILE_DEST_DIR)/$(LINK_CACHE_FILE)
else
@echo "SKIPPING refcache-save"
endif

check-links: $(GET_LINK_CHECKER_IF_NEEDED) \
refcache-restore check-links-only refcache-save

check-links-only:
$(HTMLTEST) $(HTMLTEST_ARGS)

clean:
Expand Down
9 changes: 4 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
{
"name": "docsy-user-guide",
"scripts": {
"__check:links": "make --keep-going check-links",
"_build": "npm run _hugo-dev --",
"_check:format": "npx prettier --check .",
"_check:links--warn": "npm run _check:links || (echo; echo 'WARNING: see link-checker output for issues.'; echo)",
"_check:links": "HTMLTEST_ARGS='--log-level 1 --skip-external' npm run __check:links",
"_check:links": "make --keep-going check-links",
"_diff:check": "git diff --name-only --exit-code",
"_filename-error": "echo 'ERROR: the following files violate naming conventions; fix using: `npm run fix:filenames`'; echo; npm run -s _ls-bad-filenames; exit 1",
"_filenames-to-kebab-case": "find assets content -name '*_*' ! -name '[_.]*' -exec sh -c 'mv \"$1\" \"${1//_/-}\"' _ {} \\;",
Expand All @@ -21,14 +20,14 @@
"build": "npm run _build --",
"check:filenames": "test -z \"$(npm run -s _ls-bad-filenames)\" || npm run -s _filename-error",
"check:format": "npm run _check:format || (echo '[help] Run: npm run fix:format'; exit 1)",
"check:links:all": "HTMLTEST_ARGS= npm run _check:links",
"check:links:internal": "HTMLTEST_ARGS='--skip-external' npm run _check:links",
"check:links": "npm run _check:links",
"clean": "rm -Rf public",
"clean": "rm -Rf public/",
"diff:check": "npm run _diff:check || (echo; echo 'WARNING: the files above have not been committed'; echo)",
"fix:filenames": "npm run _filenames-to-kebab-case",
"fix:format": "npm run _check:format -- --write",
"get:submodule": "npm run _get:${GET:-submodule}",
"make:public": "git init -b main public",
"make:public": "git init public",
"postbuild:preview": "npm run _check:links--warn",
"postbuild:production": "npm run _check:links--warn",
"precheck:links:all": "npm run build",
Expand Down
102 changes: 102 additions & 0 deletions static/refcache.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"https://code.jquery.com/jquery-3.7.1.min.js": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:03:42.391465-05:00"
},
"https://creativecommons.org/licenses/by/4.0": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:04:08.637825-05:00"
},
"https://git-scm.com/book/en/v2/Git-Tools-Submodules": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:04:08.590491-05:00"
},
"https://github.com/chalin/docsy-starter/commit/f3351ea626bd9de188987d2d43c2bd6f61bf837b": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:52.937468-05:00"
},
"https://github.com/chalin/docsy-starter/commit/f613f31ef0a75a96aef5ff04143f22600e0a14f4": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:53.002786-05:00"
},
"https://github.com/chalin/docsy-starter/issues/new": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:47.658611-05:00"
},
"https://github.com/chalin/docsy-starter/tree/main/content/en/blog/2024/hello.md": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:07:16.877967-05:00"
},
"https://github.com/chalin/docsy-starter/tree/main/content/en/blog/_index.md": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:07:16.789144-05:00"
},
"https://github.com/chalin/docsy-starter/tree/main/content/en/docs/_index.md": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:07:16.784901-05:00"
},
"https://github.com/chalin/docsy-starter/tree/main/content/en/docs/examples.md": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:07:16.790634-05:00"
},
"https://github.com/chalin/docsy-starter/tree/main/content/en/docs/get-started/_index.md": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:07:16.694206-05:00"
},
"https://github.com/cncf/foundation/blob/main/code-of-conduct.md": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:07:16.62173-05:00"
},
"https://github.com/google/docsy": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:04:03.076715-05:00"
},
"https://github.com/google/docsy-example": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:04:03.458186-05:00"
},
"https://github.com/google/docsy/discussions": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:52.769766-05:00"
},
"https://github.com/google/docsy/pulls": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:53.679368-05:00"
},
"https://gohugo.io": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:03:52.782709-05:00"
},
"https://gohugo.io/hugo-modules/": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:03:57.930006-05:00"
},
"https://groups.google.com/forum/#!forum/docsy-users": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:58.085171-05:00"
},
"https://netlify.com": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:04:08.5774-05:00"
},
"https://stackoverflow.com/questions/tagged/docsy": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:03:58.306202-05:00"
},
"https://www.docsy.dev//docs/updating/convert-site-to-module/": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:04:13.894846-05:00"
},
"https://www.linuxfoundation.org/legal/privacy-policy": {
"StatusCode": 200,
"LastSeen": "2024-11-06T11:04:13.748872-05:00"
},
"https://www.netlify.com": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:03:47.50766-05:00"
},
"https://www.netlify.com/img/global/badges/netlify-color-accent.svg": {
"StatusCode": 206,
"LastSeen": "2024-11-06T11:03:52.62081-05:00"
}
}

0 comments on commit dbcae4a

Please sign in to comment.