Skip to content

Commit 3b3096c

Browse files
authored
doi: add new doi backend
Add a new backend to support mounting datasets published with a digital object identifier (DOI).
1 parent 51fd697 commit 3b3096c

13 files changed

Lines changed: 1652 additions & 0 deletions

File tree

backend/all/all.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
_ "github.com/rclone/rclone/backend/combine"
1515
_ "github.com/rclone/rclone/backend/compress"
1616
_ "github.com/rclone/rclone/backend/crypt"
17+
_ "github.com/rclone/rclone/backend/doi"
1718
_ "github.com/rclone/rclone/backend/drive"
1819
_ "github.com/rclone/rclone/backend/dropbox"
1920
_ "github.com/rclone/rclone/backend/fichier"

backend/doi/api/dataversetypes.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Type definitions specific to Dataverse
2+
3+
package api
4+
5+
// DataverseDatasetResponse is returned by the Dataverse dataset API
6+
type DataverseDatasetResponse struct {
7+
Status string `json:"status"`
8+
Data DataverseDataset `json:"data"`
9+
}
10+
11+
// DataverseDataset is the representation of a dataset
12+
type DataverseDataset struct {
13+
LatestVersion DataverseDatasetVersion `json:"latestVersion"`
14+
}
15+
16+
// DataverseDatasetVersion is the representation of a dataset version
17+
type DataverseDatasetVersion struct {
18+
LastUpdateTime string `json:"lastUpdateTime"`
19+
Files []DataverseFile `json:"files"`
20+
}
21+
22+
// DataverseFile is the representation of a file found in a dataset
23+
type DataverseFile struct {
24+
DirectoryLabel string `json:"directoryLabel"`
25+
DataFile DataverseDataFile `json:"dataFile"`
26+
}
27+
28+
// DataverseDataFile represents file metadata details
29+
type DataverseDataFile struct {
30+
ID int64 `json:"id"`
31+
Filename string `json:"filename"`
32+
ContentType string `json:"contentType"`
33+
FileSize int64 `json:"filesize"`
34+
OriginalFileFormat string `json:"originalFileFormat"`
35+
OriginalFileSize int64 `json:"originalFileSize"`
36+
OriginalFileName string `json:"originalFileName"`
37+
MD5 string `json:"md5"`
38+
}

backend/doi/api/inveniotypes.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Type definitions specific to InvenioRDM
2+
3+
package api
4+
5+
// InvenioRecordResponse is the representation of a record stored in InvenioRDM
6+
type InvenioRecordResponse struct {
7+
Links InvenioRecordResponseLinks `json:"links"`
8+
}
9+
10+
// InvenioRecordResponseLinks represents a record's links
11+
type InvenioRecordResponseLinks struct {
12+
Self string `json:"self"`
13+
}
14+
15+
// InvenioFilesResponse is the representation of a record's files
16+
type InvenioFilesResponse struct {
17+
Entries []InvenioFilesResponseEntry `json:"entries"`
18+
}
19+
20+
// InvenioFilesResponseEntry is the representation of a file entry
21+
type InvenioFilesResponseEntry struct {
22+
Key string `json:"key"`
23+
Checksum string `json:"checksum"`
24+
Size int64 `json:"size"`
25+
Updated string `json:"updated"`
26+
MimeType string `json:"mimetype"`
27+
Links InvenioFilesResponseEntryLinks `json:"links"`
28+
}
29+
30+
// InvenioFilesResponseEntryLinks represents file links details
31+
type InvenioFilesResponseEntryLinks struct {
32+
Content string `json:"content"`
33+
}

backend/doi/api/types.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Package api has general type definitions for doi
2+
package api
3+
4+
// DoiResolverResponse is returned by the DOI resolver API
5+
//
6+
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
7+
type DoiResolverResponse struct {
8+
ResponseCode int `json:"responseCode"`
9+
Handle string `json:"handle"`
10+
Values []DoiResolverResponseValue `json:"values"`
11+
}
12+
13+
// DoiResolverResponseValue is a single handle record value
14+
type DoiResolverResponseValue struct {
15+
Index int `json:"index"`
16+
Type string `json:"type"`
17+
Data DoiResolverResponseValueData `json:"data"`
18+
TTL int `json:"ttl"`
19+
Timestamp string `json:"timestamp"`
20+
}
21+
22+
// DoiResolverResponseValueData is the data held in a handle value
23+
type DoiResolverResponseValueData struct {
24+
Format string `json:"format"`
25+
Value any `json:"value"`
26+
}

backend/doi/dataverse.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Implementation for Dataverse
2+
3+
package doi
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"net/http"
9+
"net/url"
10+
"path"
11+
"strings"
12+
"time"
13+
14+
"github.com/rclone/rclone/backend/doi/api"
15+
"github.com/rclone/rclone/fs"
16+
"github.com/rclone/rclone/lib/rest"
17+
)
18+
19+
// Returns true if resolvedURL is likely a DOI hosted on a Dataverse intallation
20+
func activateDataverse(resolvedURL *url.URL) (isActive bool) {
21+
queryValues := resolvedURL.Query()
22+
persistentID := queryValues.Get("persistentId")
23+
return persistentID != ""
24+
}
25+
26+
// Resolve the main API endpoint for a DOI hosted on a Dataverse installation
27+
func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) {
28+
queryValues := resolvedURL.Query()
29+
persistentID := queryValues.Get("persistentId")
30+
31+
query := url.Values{}
32+
query.Add("persistentId", persistentID)
33+
endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/datasets/:persistentId/", RawQuery: query.Encode()})
34+
35+
return Dataverse, endpointURL, nil
36+
}
37+
38+
// dataverseProvider implements the doiProvider interface for Dataverse installations
39+
type dataverseProvider struct {
40+
f *Fs
41+
}
42+
43+
// ListEntries returns the full list of entries found at the remote, regardless of root
44+
func (dp *dataverseProvider) ListEntries(ctx context.Context) (entries []*Object, err error) {
45+
// Use the cache if populated
46+
cachedEntries, found := dp.f.cache.GetMaybe("files")
47+
if found {
48+
parsedEntries, ok := cachedEntries.([]Object)
49+
if ok {
50+
for _, entry := range parsedEntries {
51+
newEntry := entry
52+
entries = append(entries, &newEntry)
53+
}
54+
return entries, nil
55+
}
56+
}
57+
58+
filesURL := dp.f.endpoint
59+
var res *http.Response
60+
var result api.DataverseDatasetResponse
61+
opts := rest.Opts{
62+
Method: "GET",
63+
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
64+
Parameters: filesURL.Query(),
65+
}
66+
err = dp.f.pacer.Call(func() (bool, error) {
67+
res, err = dp.f.srv.CallJSON(ctx, &opts, nil, &result)
68+
return shouldRetry(ctx, res, err)
69+
})
70+
if err != nil {
71+
return nil, fmt.Errorf("readDir failed: %w", err)
72+
}
73+
modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime)
74+
if modTimeErr != nil {
75+
fs.Logf(dp.f, "error: could not parse last update time %v", modTimeErr)
76+
modTime = timeUnset
77+
}
78+
for _, file := range result.Data.LatestVersion.Files {
79+
contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID)
80+
query := url.Values{}
81+
query.Add("format", "original")
82+
contentURL := dp.f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
83+
entry := &Object{
84+
fs: dp.f,
85+
remote: path.Join(file.DirectoryLabel, file.DataFile.Filename),
86+
contentURL: contentURL.String(),
87+
size: file.DataFile.FileSize,
88+
modTime: modTime,
89+
md5: file.DataFile.MD5,
90+
contentType: file.DataFile.ContentType,
91+
}
92+
if file.DataFile.OriginalFileName != "" {
93+
entry.remote = path.Join(file.DirectoryLabel, file.DataFile.OriginalFileName)
94+
entry.size = file.DataFile.OriginalFileSize
95+
entry.contentType = file.DataFile.OriginalFileFormat
96+
}
97+
entries = append(entries, entry)
98+
}
99+
// Populate the cache
100+
cacheEntries := []Object{}
101+
for _, entry := range entries {
102+
cacheEntries = append(cacheEntries, *entry)
103+
}
104+
dp.f.cache.Put("files", cacheEntries)
105+
return entries, nil
106+
}
107+
108+
func newDataverseProvider(f *Fs) doiProvider {
109+
return &dataverseProvider{
110+
f: f,
111+
}
112+
}

0 commit comments

Comments
 (0)