diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 5d97b115a..04353aa37 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -116,6 +116,8 @@ jobs: echo "Retrieve bibliography" cd scripts chmod +x ./update_bibliography.sh + chmod +x ./bibSplit.pl + ./update_bibliography.sh sudo cp --recursive ${GITHUB_WORKSPACE}/static/data ~/data fi diff --git a/.gitignore b/.gitignore index b41f2aa34..5a668c005 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ _gen/ .hugo_build.lock .idea data/bibliography.json +/docs diff --git a/layouts/shortcodes/bibItem.html b/layouts/shortcodes/bibItem.html new file mode 100644 index 000000000..82085b925 --- /dev/null +++ b/layouts/shortcodes/bibItem.html @@ -0,0 +1,294 @@ + + +{{- $notesIcon := "\u24C3" -}} +{{- $nonBreakingHyphen := "\u2011" -}} + + +{{ $item := dict -}} +{{- $jsonFile := printf "data/bibItems/%s.json" (.Get "key") -}} +{{- $bib := resources.Get $jsonFile -}} +{{- with $bib -}} + {{- with . | transform.Unmarshal -}} + {{- $item = . -}} + {{- end -}} +{{- else -}} + {{- $jsonFile | errorf "Unable to get item resource '%s'" -}} +{{- end -}} +{{- with $item -}} + {{- if or .author .editor -}} +

+ {{- if not .author -}}Edited by: {{- end -}} + {{- range $naE, $auEd := or .author .editor -}} + {{- if $naE -}}; {{ end -}} + {{- if $auEd.literal -}} + {{- $auEd.literal -}} + {{- else if and $auEd.family $auEd.given -}} + {{- $auEd.family -}}, {{- $auEd.given -}} + {{- else -}} + {{- $auEd.family -}}{{- $auEd.given -}} + {{- end -}} + {{- end -}} +

+ {{- end -}} + {{- if .url }} +

Access document

+ {{ end -}} + {{- if eq .itemType "journalArticle" -}} + {{- $journalTitle := or .publicationTitle .journalAbbreviation -}} + {{- if $journalTitle }} +
Publication: {{ $journalTitle -}} + {{- end -}} + {{- if .volume }} +
Volume: {{ .volume -}} + {{- end -}} + {{- if .issue }} +
Issue: {{ .issue -}} + {{- end -}} + {{- $date := "" -}} + {{- if .issuedDateParts -}} + {{- $dp := .issuedDateParts -}} + {{- $dpl := len $dp -}} + {{- $dpf := "January 1, 1900" -}} + {{- $y := 0 -}} + {{- $m := 10 -}} + {{- $d := 18 -}} + {{- if ge $dpl 1 -}} + {{- $y := strings.TrimLeft "0" (string (index $dp 0)) | int -}} + {{- $dpf := "1900" -}} + {{- end -}} + {{- if ge $dpl 2 -}} + {{- $m := strings.TrimLeft "0" (string (index $dp 1)) | int -}} + {{- $dpf := "January 1900" -}} + {{- end -}} + {{- if ge $dpl 3 -}} + {{- $d := strings.TrimLeft "0" (string (index $dp 2)) | int -}} + {{- $dpf := "January 1, 1900" -}} + {{- end -}} + {{- if $y -}} + {{- $t := printf "%4d-%2d-%2d" $y $m $d -}} + {{- $dp := time.AsTime $t -}} + {{- $date := $dp.Format $dpf -}} + {{- end -}} + {{- end -}} + {{- if and (not $date) .date -}} + {{- $date := .date -}} + {{- end -}} + {{- if $date }} +
Date: {{ $date -}} + {{- end -}} + {{- $pages := or .pages .page -}} + {{- if $pages }} +
Pages: {{ $pages -}} + {{- end -}} + {{- end -}} + {{- if .DOI }} +
DOI: {{ .DOI -}} + {{- end -}} + {{- if .ISSN }} +
ISSN: {{ .ISSN -}} + {{- end -}} + {{- if .abstract }} +
Abstract:
+ {{- range $na, $abstract := split (plainify .abstract) "\n" -}} + {{- if $na -}}
{{- end -}} + {{if strings.ContainsNonSpace $abstract -}} + {{- $trimmed := strings.TrimLeft " " $abstract -}}{{- $indent := (sub ($abstract | len) ($trimmed | len)) -}} + {{- print (safeJS (htmlUnescape (printf "%s" $trimmed | printf "%s%s" (strings.Repeat $indent " ") | printf "%s"))) -}} + {{- end -}} + {{- end -}} +
+ {{ end -}} + {{- if and false .note -}} + {{- $itemID := (cond (gt (len .id) 0) (replace .id `/` `_`) `_0`) -}} + {{- $notes := split (plainify .note) "\n" -}} +
Notes + + {{- range $nn, $note := $notes -}} + {{if strings.ContainsNonSpace $note -}} + {{- if $nn -}}
{{- end -}} + {{- print (safeJS (replace (replace (htmlUnescape $note) "-" $nonBreakingHyphen) " " " ")) -}} + {{- end -}} + {{- end -}} +
+
+ {{- end -}} +{{- end -}} +


+ + diff --git a/scripts/bib-fns.jq b/scripts/bib-fns.jq index ae61d919b..668a0a3ac 100644 --- a/scripts/bib-fns.jq +++ b/scripts/bib-fns.jq @@ -12,6 +12,9 @@ def unwrapDiv: def moveURL_to_url: select(nonBlankKey("URL")) | (setpath(["url"]; .URL) | del(.URL)) // .; +def raise_issued_date_parts: + if nonBlankKey("issued") and (.issued | nonBlankKey("date-parts")) then setpath(["issuedDateParts"]; .issued."date-parts"[0]) else . end; + def make_DOI_to_url($doi): if ($doi | startswith("https:")) then $doi else "https://doi.org/" + ($doi | ltrimstr("/")) end ; @@ -90,3 +93,7 @@ def semiflatten: # assumes that only one item is the input + ($innerKeys | map(. as $iKey | {"key": $iKey, "value": ($inner | getpath([$iKey]))})) ) | from_entries; +def bibItem: # assumes that only one item is the input + . as $item + | (keys - ["key","title","target"]) as $tailKeys + | {"key": .key, "title": .title, "target": .target } + ($tailKeys | map(. as $tKey | {"key": $tKey, "value": ($item | getpath([$tKey]))}) | from_entries); diff --git a/scripts/bibSplit.pl b/scripts/bibSplit.pl new file mode 100644 index 000000000..b84819c69 --- /dev/null +++ b/scripts/bibSplit.pl @@ -0,0 +1,35 @@ +#! /usr/bin/perl -n +BEGIN +{ + $bibDir = $ENV{'BIBLIOGRAPHY_DIR'}; + $bibItemsDir = $ENV{'BIBITEMS_DIR'}; +} +my $item = $_; +/"key"\:"([A-Za-z0-9]{8})"/ || print "Cannot find key in line: $_\n"; +my $key = $1; +/"target"\:"([A-Za-z0-9]{8})"/ || print "Cannot find target for key \"$key\" in line: $_\n"; +my $target = $1; +if ($key eq $target) { # only top level entries + my $handle = undef; + my $itemjson = "$bibItemsDir/$key.json"; + open($handle, ">", $itemjson) || die "$0: cannot open $itemjson in write-open mode: $!"; + print $handle $item; + close $handle || die "$0: close of file $itemjson failed: $!"; + + /"title"\:("(?:[^"\\]++|\\.)*+")/ || print "Cannot find title for key \"$key\" in line: $_\n"; + my $itemTitle = $1; + $handle = undef; + my $itemmd = "$bibDir/$key.md"; + open($handle, ">", $itemmd) || die "$0: cannot open $itemmd in write-open mode: $!"; + print $handle <}} +ENDITEM + close $handle || die "$0: close of file $itemmd failed: $!"; + +} diff --git a/scripts/update_bibliography.sh b/scripts/update_bibliography.sh index 6b99a0055..327ff1c0d 100755 --- a/scripts/update_bibliography.sh +++ b/scripts/update_bibliography.sh @@ -3,12 +3,36 @@ set -e rawItemsFile=false -debugFiles=false +debugFiles=true tagFiles=false -typeFiles=false +typeFiles=true +curlFiles=false # The collection files will be created only if directly querying Zotero API. collectionFiles=false +# removeChildrenFromFinalFile=false + +showInfoLevel=2 +# 0: NO showInfo messages. +# 1: high level general info messages. +# 2: warning-type messages. +# 3: +# 4: +# 5: detailed general info messages. +# 6: +# 7: +# 8: individual processing steps (most of which may generate a debug File -- see $debugFiles) +# 9: very detailed general info messages. +# 10: extremely detailed messages (e.g., calls to curl, etc.) + +function showInfo() { + # The $1 is the level for this message ($showInfoLevel must be >= this) + # The $2 is the message string + if [[ $showInfoLevel -ge $1 ]] ; then + echo "$2" + fi +} + dfn="00" function debugFileName() { # The $1 is the file NAME string (without the number prefix or extension) @@ -27,45 +51,76 @@ collections="" function add_items_from_collection () { local collection_key="$1" - echo "Getting collection $collection_key" + local collection_item=$(jq -r "map(select(.key==\"$collection_key\"))|.[0]" <<< "$collections") + showInfo 5 "=====================================" + showInfo 5 "collection_item: $collection_item" + local collection_name=$(jq -r '.name' <<< "$collection_item") + local deleted=$(jq -r '.deleted // false' <<< "$collection_item") + showInfo 5 "collection.deleted: $deleted" + local numItems=$(jq -r '.numItems' <<< "$collection_item") + if [[ $deleted = "true" ]] ; then + showInfo 2 "Skipping deleted collection $collection_key: $collection_name" + return + elif [[ numItems -eq 0 ]] ; then + showInfo 2 "Empty collection $collection_key: $collection_name" + else + showInfo 1 "Getting collection $collection_key: $collection_name" local start=0 local limit=100 while :; do + showInfo 10 "curl of $collection_key at start $start" local this_page=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/items?include=data,csljson&start=$start&limit=$limit&v=3") + local item_count=$(jq '. | length' <<< "$this_page") + showInfo 10 "curl returned $item_count items" + if [[ $item_count > 0 ]] ; then + if $curlFiles ; then + local pageFile=$( printf "curl/%s-%02d.json" $collection_key $(( $start / $limit )) ) + echo "$this_page" > "$pageFile" + fi items=$(jq -s 'add' <<< "$this_page$items") - start=$(($start + $limit)) - - # Break when we don't get any more items - [[ $(jq '. | length' <<< "$this_page") > 0 ]] || break + showInfo 10 "items updated" + start=$(($start + $item_count)) + fi + # Break when we don't get the full $limit of items + # (it's still possible to do one redundant curl if collection has exact multiple of $limit items) + [[ $item_count -ge $limit ]] || break done + fi - local subcollections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.') - collections=$(jq -s 'add' <<< "$subcollections$collections") + local numCollections=$(jq -r '.numCollections' <<< "$collection_item") + if [[ $numCollections -gt 0 ]] ; then + showInfo 5 "Getting subcollections of $collection_key: $collection_name" + local subcollections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r 'map(.data+.meta)') + collections=$(jq -s 'add' <<< "$collections$subcollections") # Recurse into subcollections while read subcollection_key; do add_items_from_collection $subcollection_key done < <(jq -r '.[].key' <<< "$subcollections") + fi } root_collection="FSK5IX4F" if [[ $# -eq 0 ]] ; then # Initialize with the root collection. - collections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/top" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.' | jq "map(select(.key==\"$root_collection\"))") + collections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$root_collection" | jq -s -r 'map(.data+.meta)') add_items_from_collection $root_collection if $collectionFiles ; then echo "$collections" > collections.json fi - echo "$(jq '. | length' <<< "$collections") collections" + showInfo 1 "$(jq '. | length' <<< "$collections") collections" while read neededUrl; do + showInfo 10 "curl of $neededUrl" item=$(curl -s "$neededUrl?include=data,csljson&v=3") + showInfo 10 "curl returned" items=$(jq -s 'add' <<< "[$item]$items") + showInfo 10 "items updated" done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items") + if $collectionFiles ; then grouped_collections=$(jq 'group_by(.parentCollection)' <<< "$collections") # This is not yet fully hierarchically nested. - if $collectionFiles ; then echo "$grouped_collections" > grouped_collections.json fi # Only if we got the raw items from the Zotero API @@ -76,33 +131,52 @@ else items=$(<$1) fi -echo "Got $(jq '. | length' <<< "$items") items" +showInfo 1 "Got $(jq '. | length' <<< "$items") items" # Piece-wise processing for debugging: items=$(jq 'include "./bib-fns";map(semiflatten)' <<< "$items") +showInfo 8 "Elevate fields in .csljson and .data to the item level." if $debugFiles ; then dfn=$(debugFileName "semiflattened" $dfn) echo "$items" > "$dfn" fi items=$(jq 'include "./bib-fns";map(if has("note") then .note |= unwrapDiv else . end)' <<< "$items") +showInfo 8 "Clean up note field (extract from HTML div)." if $debugFiles ; then dfn=$(debugFileName "cleanDiv" $dfn) echo "$items" > "$dfn" fi -#find and eliminate duplicate .key entries + +showInfo 8 "Find duplicate .key entries" groupedByKey=$(jq 'group_by(.key)' <<< "$items") if $debugFiles ; then dupIDs=$(jq 'map(select(length>1) | {key: .[0].key, items: .})' <<< "$groupedByKey") + numDupKeys=$(jq '. | length' <<< "$dupIDs") + if [[ "$numDupKeys" -ne 0 ]] ; then + numDupEntries=$(jq 'map(.items | length) | add' <<< "$dupIDs") + showInfo 2 "$numDupKeys keys have multiple occurrences totalling $numDupEntries entries" + fi dfn=$(debugFileName "dupIDs" $dfn) echo "$dupIDs" > "$dfn" fi #remove duplicates items=$(jq 'map(.[0]) | sort_by(.date)' <<< "$groupedByKey") +showInfo 8 "Remove Duplicates" if $debugFiles ; then dfn=$(debugFileName "noDupSorted" $dfn) echo "$items" > "$dfn" fi + +# Remove library, links, and meta objects; and some fields from entries +# (These are not used any further on, so simplify.) +items=$(jq 'map(del(.["library", "links", "meta", "accessed", "accessDate", "dateAdded", "dateModified"]))' <<< "$items") +showInfo 8 "Remove library, links, and meta objects and some unnecessary fields" +if $debugFiles ; then + dfn=$(debugFileName "noLibraryLinksMeta" $dfn) + echo "$items" > "$dfn" +fi + if $typeFiles ; then types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items") echo "$types" > titleKeyTypeInfo.json @@ -115,50 +189,66 @@ if $tagFiles ; then fi # consolidate URL into url field items=$(jq 'include "./bib-fns";map(if nonBlankKey("URL") then setpath(["url"]; .URL) | del(.URL) else . end)' <<< "$items") +showInfo 8 "Consolidate URL to url (upper vs. lower case)" if $debugFiles ; then - dfn=$(debugFileName "consolidated" $dfn) + dfn=$(debugFileName "urlConsolidated" $dfn) echo "$items" > "$dfn" fi # Use DOI to create url where needed and available items=$(jq 'include "./bib-fns";map(if (blankKey("url") and nonBlankKey("DOI")) then setpath(["url"]; make_DOI_to_url(.DOI)) else . end)' <<< "$items") +showInfo 8 "Use DOI to construct URL if needed" if $debugFiles ; then dfn=$(debugFileName "DOI-url" $dfn) echo "$items" > "$dfn" fi items=$(jq 'include "./bib-fns";map(getTargetInfo)' <<< "$items") -if $debugFiles ; then - dfn=$(debugFileName "withTargetInfo" $dfn) - echo "$items" > "$dfn" -fi # now use children items to amend the info for the parentItem -allFixupInfo=$(jq 'group_by(.target)|map(sort_by(.parentItem))' <<< "$items") +childrenGroupedByParent=$(jq 'group_by(.target)|map(sort_by(.parentItem))' <<< "$items") +showInfo 8 "Collect children grouped by parent" if $debugFiles ; then - dfn=$(debugFileName "allFixupInfo" $dfn) - echo "$allFixupInfo" > "$dfn" + dfn=$(debugFileName "childrenGroupedByParent" $dfn) + echo "$childrenGroupedByParent" > "$dfn" fi -# embed children into their parentItem (children field). Then delete items that Zotero indicated as such. -items=$(jq 'map(if has(1) then (first + {children: .[1:]}) else first end)|map(select(.deleted|not))' <<< "$allFixupInfo") + +showInfo 8 "Embed Children into their parentItem (children field)." +items=$(jq 'map(if has(1) then (first + {children: .[1:]}) else first end)' <<< "$childrenGroupedByParent") if $debugFiles ; then - dfn=$(debugFileName "itemsNestedChildren" $dfn) + dfn=$(debugFileName "itemsWithNestedChildren" $dfn) echo "$items" > "$dfn" fi -echo "Got $(jq '. | length' <<< "$items") clean, top-level items" +showInfo 5 "Delete items that Zotero indicated as such." +beforeCount=$(jq '. | length' <<< "$items") +items=$(jq 'map(select(.deleted|not))' <<< "$items") +afterCount=$(jq '. | length' <<< "$items") +showInfo 5 "Removed $(( $beforeCount - $afterCount )) items marked as deleted." + +showInfo 1 "Got $(jq '. | length' <<< "$items") clean, top-level items" items=$(jq 'include "./bib-fns";map(applyChildrenAmendments)' <<< "$items") +showInfo 8 "Update Parent items from their Children" if $debugFiles ; then - dfn=$(debugFileName "updatedItems" $dfn) + dfn=$(debugFileName "updatedItemsFromChildren" $dfn) echo "$items" > "$dfn" fi absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items") +showInfo 8 "Check abstract vs. abstractNote mismatch" if $debugFiles ; then dfn=$(debugFileName "absNoteMismatch" $dfn) echo "$absNote" > "$dfn" fi +absNoteMismatchCount=$(jq '. | length' <<< "$absNote") +if [[ "$absNoteMismatchCount" -eq 1 ]] ; then + showInfo 2 "Warn: 1 entry with mismatch between abstract and abstractNote fields" +elif [[ "$absNoteMismatchCount" -ne 0 ]] ; then + showInfo 2 "Warn: $absNoteMismatchCount entries with mismatch between abstract and abstractNote fields" +fi + # remove redundant .abstractNote fields items=$(jq 'include "./bib-fns";map(cleanAbstracts)' <<< "$items") +showInfo 8 "Remove redundant abstractNote fields" if $debugFiles ; then dfn=$(debugFileName "abstractsCleaned" $dfn) echo "$items" > "$dfn" @@ -167,51 +257,52 @@ if $typeFiles ; then types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items") echo "$types" > finalTitleKeyTypeInfo.json fi -noURL=$(jq 'include "./bib-fns";map(select(blankKey("url")))' <<< "$items") if $debugFiles ; then + noURL=$(jq 'include "./bib-fns";map(select(blankKey("url")))' <<< "$items") dfn=$(debugFileName "noURL" $dfn) echo "$noURL" > "$dfn" fi finalCount=$(jq '. | length' <<< "$items") -# # Remove .children arrays, if any. Save space. -# items=$(jq 'map(del(.children))' <<< "$items") -# if $debugFiles ; then - # dfn=$(debugFileName "withoutChildrenArray") - # echo "$items" > "$dfn" + +items=$(jq 'include "./bib-fns";map(raise_issued_date_parts)' <<< "$items") +# if $removeChildrenFromFinalFile; then +# # Remove .children arrays, if any. Save space. +# items=$(jq 'map(del(.children))' <<< "$items") +# showInfo 8 "Remove .children arrays from final items" +# if $debugFiles ; then +# dfn=$(debugFileName "withoutChildrenArray" $dfn) +# echo "$items" > "$dfn" +# fi # fi + +# Do this here, instead of keeping a copy of the current value of $items, just to do this later. +jq -c 'include "./bib-fns";.[] | bibItem' <<< "$items" > bibliography-items-by-line.json +# jq 'include "./bib-fns";map(bibItem)' <<< "$items" > 000-bibliography-items-by-line.json + # Group by year items=$(jq 'group_by(.issued."date-parts"[0][0])' <<< "$items") -if $debugFiles ; then - dfn=$(debugFileName "grouped" $dfn) - echo "$items" > "$dfn" -fi items=$(jq 'map({ (.[0].issued."date-parts"[0][0] // "Undated" | tostring): . })' <<< "$items") +showInfo 8 "Group by year" if $debugFiles ; then - dfn=$(debugFileName "Undated" $dfn) - echo "$items" > "$dfn" -fi -items=$(jq 'map(to_entries)' <<< "$items") -if $debugFiles ; then - dfn=$(debugFileName "entries" $dfn) - echo "$items" > "$dfn" -fi -items=$(jq 'flatten(1)' <<< "$items") -if $debugFiles ; then - dfn=$(debugFileName "flattened" $dfn) + dfn=$(debugFileName "groupedByYear" $dfn) echo "$items" > "$dfn" fi -items=$(jq 'group_by(.key)' <<< "$items") -if $debugFiles ; then - dfn=$(debugFileName "groupedByKey" $dfn) - echo "$items" > "$dfn" -fi -items=$(jq 'include "./bib-fns";map(reconstituteGroupedEntries) | from_entries' <<< "$items") + +items=$(jq 'include "./bib-fns";map(to_entries) | flatten(1) | group_by(.key) | map(reconstituteGroupedEntries) | from_entries' <<< "$items") if $debugFiles ; then dfn=$(debugFileName "final" $dfn) echo "$items" > "$dfn" fi +showInfo 1 "Generating individual Bibliography entries' .md files" +BIBLIOGRAPHY_DIR="$(dirname "$0")/../content/en/history/bibliography" +export BIBLIOGRAPHY_DIR +BIBITEMS_DIR="$(dirname "$0")/../static/data/bibItems" +export BIBITEMS_DIR +./bibSplit.pl bibliography-items-by-line.json +# Cleanup (uncomment once working) +# rm bibliography-items-by-line.json -echo "Outputting CSL JSON" -echo "$finalCount entries" +showInfo 1 "Outputting CSL JSON" +showInfo 1 "$finalCount entries" echo "$items" > "$(dirname "$0")/../static/data/bibliography.json" diff --git a/static/data/bibItems/.gitkeep b/static/data/bibItems/.gitkeep new file mode 100644 index 000000000..e69de29bb