Skip to content

Commit 641d65c

Browse files
authored
GH-3654 JSON-LD 1.1 security and caching (#4957)
* GH-3654 add caching in document loader * GH-3654 update javadocs * remove problematic test * fix copyright * try to fix junit issues * try to fix junit issues
1 parent 26be6a5 commit 641d65c

File tree

10 files changed

+449
-47
lines changed

10 files changed

+449
-47
lines changed

compliance/rio/src/test/java/org/eclipse/rdf4j/rio/n3/N3ParserTest.java

Lines changed: 0 additions & 35 deletions
This file was deleted.

core/rio/api/src/main/java/org/eclipse/rdf4j/rio/helpers/JSONLDSettings.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.rio.helpers;
1212

13+
import java.util.List;
14+
import java.util.Set;
15+
1316
import org.eclipse.rdf4j.rio.RioSetting;
1417

1518
import com.github.jsonldjava.core.DocumentLoader;
@@ -153,6 +156,66 @@ public class JSONLDSettings {
153156
public static final RioSetting<Boolean> HIERARCHICAL_VIEW = new BooleanRioSetting(
154157
"org.eclipse.rdf4j.rio.jsonld.hierarchical_view", "Hierarchical representation of the JSON", Boolean.FALSE);
155158

159+
/**
160+
* Whitelist of remote/local resources that the JSON-LD parser can retrieve. Set of URIs as strings.
161+
* <p>
162+
* Default:
163+
* {@code Set.of("http://www.w3.org/ns/anno.jsonld", "http://www.w3.org/ns/activitystreams.jsonld", "http://www.w3.org/ns/ldp.jsonld", "http://www.w3.org/ns/oa.jsonld", "http://www.w3.org/ns/hydra/context.jsonld", "http://schema.org/", "https://w3id.org/security/v1", "https://w3c.github.io/json-ld-rc/context.jsonld", "https://www.w3.org/2018/credentials/v1", "https://health-lifesci.schema.org/", "https://auto.schema.org/", "https://bib.schema.org/", "http://xmlns.com/foaf/spec/index.jsonld", "https://pending.schema.org/", "https://schema.org/", "https://schema.org/docs/jsonldcontext.jsonld", "https://schema.org/version/latest/schemaorg-current-https.jsonld", "https://schema.org/version/latest/schemaorg-all-http.jsonld", "https://schema.org/version/latest/schemaorg-all-https.jsonld", "https://schema.org/version/latest/schemaorg-current-http.jsonld", "https://schema.org/version/latest/schemaorg-all.jsonld", "https://schema.org/version/latest/schemaorg-current.jsonld", "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld", "https://geojson.org/geojson-ld/geojson-context.jsonld", "https://www.w3.org/2019/wot/td/v1");
164+
*
165+
*/
166+
public static final RioSetting<Set<String>> WHITELIST = new RioSettingImpl<>(
167+
"org.eclipse.rdf4j.rio.jsonld_whitelist",
168+
"Whitelist of remote/local resources that the JSON-LD parser can retrieve. Set of URIs as strings.",
169+
Set.of(
170+
"http://www.w3.org/ns/anno.jsonld",
171+
"http://www.w3.org/ns/activitystreams.jsonld",
172+
"http://www.w3.org/ns/ldp.jsonld",
173+
"http://www.w3.org/ns/oa.jsonld",
174+
"http://www.w3.org/ns/hydra/context.jsonld",
175+
"http://schema.org/",
176+
"https://w3id.org/security/v1",
177+
"https://w3c.github.io/json-ld-rc/context.jsonld",
178+
"https://www.w3.org/2018/credentials/v1",
179+
"https://health-lifesci.schema.org/",
180+
"https://auto.schema.org/",
181+
"https://bib.schema.org/",
182+
"http://xmlns.com/foaf/spec/index.jsonld",
183+
"https://pending.schema.org/",
184+
"https://schema.org/",
185+
"https://schema.org/docs/jsonldcontext.jsonld",
186+
"https://schema.org/version/latest/schemaorg-current-https.jsonld",
187+
"https://schema.org/version/latest/schemaorg-all-http.jsonld",
188+
"https://schema.org/version/latest/schemaorg-all-https.jsonld",
189+
"https://schema.org/version/latest/schemaorg-current-http.jsonld",
190+
"https://schema.org/version/latest/schemaorg-all.jsonld",
191+
"https://schema.org/version/latest/schemaorg-current.jsonld",
192+
"https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld",
193+
"https://geojson.org/geojson-ld/geojson-context.jsonld",
194+
"https://www.w3.org/2019/wot/td/v1"
195+
));
196+
197+
/**
198+
* Secure mode only allows loading remote/local resources (ex. context from url) that are whitelisted.
199+
* <p>
200+
* Default: true
201+
*/
202+
public static final RioSetting<Boolean> SECURE_MODE = new RioSettingImpl<>(
203+
"org.eclipse.rdf4j.rio.jsonld_secure_mode",
204+
"Secure mode only allows loading remote/local resources (ex. context from url) that are whitelisted.",
205+
Boolean.TRUE);
206+
207+
/**
208+
* The document loader cache is enabled by default. All loaded documents, such as remote contexts, are cached for 1
209+
* hour, or until the cache is full. The cache holds up to 1000 documents. The cache is shared between all
210+
* JSONLDParsers. The cache can be disabled by setting this value to false.
211+
* <p>
212+
* Default: true
213+
*/
214+
public static final RioSetting<Boolean> DOCUMENT_LOADER_CACHE = new RioSettingImpl<>(
215+
"org.eclipse.rdf4j.rio.jsonld_document_loader_cache",
216+
"The document loader cache is enabled by default. All loaded documents, such as remote contexts, are cached for 1 hour, or until the cache is full. The cache holds up to 1000 documents. The cache is shared between all JSONLDParsers. The cache can be disabled by setting this value to false.",
217+
Boolean.TRUE);
218+
156219
/**
157220
* Private default constructor.
158221
*/

core/rio/jsonld/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@
7474
<groupId>commons-io</groupId>
7575
<artifactId>commons-io</artifactId>
7676
</dependency>
77+
<dependency>
78+
<groupId>com.google.guava</groupId>
79+
<artifactId>guava</artifactId>
80+
</dependency>
7781
<dependency>
7882
<groupId>${project.groupId}</groupId>
7983
<artifactId>rdf4j-rio-api</artifactId>
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2024 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
******************************************************************************/
11+
12+
package org.eclipse.rdf4j.rio.jsonld;
13+
14+
import java.net.URI;
15+
import java.util.Set;
16+
import java.util.concurrent.ExecutionException;
17+
import java.util.concurrent.TimeUnit;
18+
19+
import org.eclipse.rdf4j.rio.RDFParseException;
20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
23+
import com.google.common.cache.CacheBuilder;
24+
import com.google.common.cache.CacheLoader;
25+
import com.google.common.cache.LoadingCache;
26+
27+
import no.hasmac.jsonld.JsonLdError;
28+
import no.hasmac.jsonld.document.Document;
29+
import no.hasmac.jsonld.loader.DocumentLoader;
30+
import no.hasmac.jsonld.loader.DocumentLoaderOptions;
31+
import no.hasmac.jsonld.loader.SchemeRouter;
32+
33+
public class CachingDocumentLoader implements DocumentLoader {
34+
private static final DocumentLoader defaultLoader = SchemeRouter.defaultInstance();
35+
private static final Logger logger = LoggerFactory.getLogger(CachingDocumentLoader.class);
36+
37+
private static final LoadingCache<URI, Document> cache = CacheBuilder.newBuilder()
38+
.maximumSize(1000) // Maximum 1000 documents in cache
39+
.expireAfterWrite(1, TimeUnit.HOURS) // Expire after 1 hour
40+
.concurrencyLevel(8) // Optimize for 8 concurrent threads
41+
.build(new CacheLoader<>() {
42+
@Override
43+
public Document load(URI url) throws Exception {
44+
return defaultLoader.loadDocument(url, new DocumentLoaderOptions());
45+
}
46+
});
47+
48+
private final boolean secureMode;
49+
private final Set<String> whitelist;
50+
private final boolean documentLoaderCache;
51+
52+
public CachingDocumentLoader(boolean secureMode, Set<String> whitelist, boolean documentLoaderCache) {
53+
this.secureMode = secureMode;
54+
this.whitelist = whitelist;
55+
this.documentLoaderCache = documentLoaderCache;
56+
}
57+
58+
@Override
59+
public Document loadDocument(URI uri, DocumentLoaderOptions options) {
60+
61+
try {
62+
if (!secureMode || whitelist.contains(uri.toString())) {
63+
if (documentLoaderCache) {
64+
try {
65+
return cache.get(uri);
66+
} catch (ExecutionException e) {
67+
if (e.getCause() != null) {
68+
throw new RDFParseException("Could not load document from " + uri, e.getCause());
69+
}
70+
throw new RDFParseException("Could not load document from " + uri, e);
71+
}
72+
} else {
73+
try {
74+
return defaultLoader.loadDocument(uri, options);
75+
} catch (JsonLdError e) {
76+
throw new RDFParseException("Could not load document from " + uri, e);
77+
}
78+
}
79+
} else {
80+
throw new RDFParseException("Could not load document from " + uri
81+
+ " because it is not whitelisted. See: JSONLDSettings.WHITELIST and JSONLDSettings.SECURE_MODE");
82+
}
83+
} catch (RDFParseException e) {
84+
logger.error(e.getMessage(), e);
85+
throw e;
86+
}
87+
}
88+
}

core/rio/jsonld/src/main/java/org/eclipse/rdf4j/rio/jsonld/JSONLDParser.java

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,18 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.rio.jsonld;
1212

13+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.DOCUMENT_LOADER_CACHE;
14+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.SECURE_MODE;
15+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.WHITELIST;
16+
1317
import java.io.IOException;
1418
import java.io.InputStream;
1519
import java.io.Reader;
1620
import java.net.URI;
1721
import java.net.URISyntaxException;
1822
import java.util.Collection;
1923
import java.util.Optional;
24+
import java.util.Set;
2025
import java.util.function.BiConsumer;
2126

2227
import org.eclipse.rdf4j.model.IRI;
@@ -48,8 +53,6 @@
4853
import no.hasmac.jsonld.document.JsonDocument;
4954
import no.hasmac.jsonld.lang.Keywords;
5055
import no.hasmac.jsonld.loader.DocumentLoader;
51-
import no.hasmac.jsonld.loader.DocumentLoaderOptions;
52-
import no.hasmac.jsonld.loader.SchemeRouter;
5356
import no.hasmac.rdf.RdfConsumer;
5457
import no.hasmac.rdf.RdfValueFactory;
5558

@@ -126,12 +129,21 @@ private void parse(InputStream in, Reader reader, String baseURI)
126129
BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES);
127130
}
128131

132+
boolean secureMode = getParserConfig().get(SECURE_MODE);
133+
boolean documentLoaderCache = getParserConfig().get(DOCUMENT_LOADER_CACHE);
134+
135+
Set<String> whitelist = getParserConfig().get(WHITELIST);
136+
129137
JsonLdOptions opts = new JsonLdOptions();
130138
opts.setUriValidation(false);
131139
opts.setExceptionOnWarning(getParserConfig().get(JSONLDSettings.EXCEPTION_ON_WARNING));
132140

133141
Document context = getParserConfig().get(JSONLDSettings.EXPAND_CONTEXT);
134142

143+
DocumentLoader defaultDocumentLoader = opts.getDocumentLoader();
144+
CachingDocumentLoader cachingDocumentLoader = new CachingDocumentLoader(secureMode, whitelist,
145+
documentLoaderCache);
146+
135147
if (context != null) {
136148

137149
opts.setExpandContext(context);
@@ -142,22 +154,21 @@ private void parse(InputStream in, Reader reader, String baseURI)
142154
throw new RDFParseException("Expand context is not a valid JSON document");
143155
}
144156
opts.getContextCache().put(context.getDocumentUrl().toString(), jsonContent.get());
145-
opts.setDocumentLoader(new DocumentLoader() {
146-
147-
private final DocumentLoader defaultDocumentLoader = SchemeRouter.defaultInstance();
148-
149-
@Override
150-
public Document loadDocument(URI url, DocumentLoaderOptions options) throws JsonLdError {
151-
if (url.equals(context.getDocumentUrl())) {
152-
return context;
153-
}
154-
return defaultDocumentLoader.loadDocument(url, options);
157+
opts.setDocumentLoader((uri, options) -> {
158+
if (uri.equals(context.getDocumentUrl())) {
159+
return context;
155160
}
161+
162+
return cachingDocumentLoader.loadDocument(uri, options);
156163
});
157164
}
158165

159166
}
160167

168+
if (secureMode && opts.getDocumentLoader() == defaultDocumentLoader) {
169+
opts.setDocumentLoader(cachingDocumentLoader);
170+
}
171+
161172
if (baseURI != null && !baseURI.isEmpty()) {
162173
URI uri = new URI(baseURI);
163174
opts.setBase(uri);

0 commit comments

Comments
 (0)