Skip to content

Commit a2b44d9

Browse files
committed
GH-3654 add caching in document loader
1 parent 26be6a5 commit a2b44d9

File tree

9 files changed

+408
-10
lines changed

9 files changed

+408
-10
lines changed

core/rio/api/src/main/java/org/eclipse/rdf4j/rio/helpers/JSONLDSettings.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.rio.helpers;
1212

13+
import java.util.List;
14+
import java.util.Set;
15+
1316
import org.eclipse.rdf4j.rio.RioSetting;
1417

1518
import com.github.jsonldjava.core.DocumentLoader;
@@ -153,6 +156,33 @@ public class JSONLDSettings {
153156
public static final RioSetting<Boolean> HIERARCHICAL_VIEW = new BooleanRioSetting(
154157
"org.eclipse.rdf4j.rio.jsonld.hierarchical_view", "Hierarchical representation of the JSON", Boolean.FALSE);
155158

159+
/**
160+
*
161+
*
162+
*/
163+
public static final RioSetting<Set<String>> WHITELIST = new RioSettingImpl<>(
164+
"org.eclipse.rdf4j.rio.jsonld_whitelist",
165+
"Whitelist of remote/local resources that the JSON-LD parser can retrieve. Set of URIs as strings.",
166+
Set.of());
167+
168+
/**
169+
*
170+
*
171+
*/
172+
public static final RioSetting<Boolean> SECURE_MODE = new RioSettingImpl<>(
173+
"org.eclipse.rdf4j.rio.jsonld_secure_mode",
174+
"Secure mode only allows loading remote/local resources (ex. context from url) that are whitelisted.",
175+
Boolean.TRUE);
176+
177+
/**
178+
*
179+
*
180+
*/
181+
public static final RioSetting<Boolean> DOCUMENT_LOADER_CACHE = new RioSettingImpl<>(
182+
"org.eclipse.rdf4j.rio.jsonld_document_loader_cache",
183+
"The document loader cache is enabled by default. All loaded documents, such as remote contexts, are cached for 1 hour, or until the cache is full. The cache holds up to 1000 documents. The cache is shared between all JSONLDParsers. The cache can be disabled by setting this value to false.",
184+
Boolean.TRUE);
185+
156186
/**
157187
* Private default constructor.
158188
*/

core/rio/jsonld/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@
7474
<groupId>commons-io</groupId>
7575
<artifactId>commons-io</artifactId>
7676
</dependency>
77+
<dependency>
78+
<groupId>com.google.guava</groupId>
79+
<artifactId>guava</artifactId>
80+
</dependency>
7781
<dependency>
7882
<groupId>${project.groupId}</groupId>
7983
<artifactId>rdf4j-rio-api</artifactId>
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package org.eclipse.rdf4j.rio.jsonld;
2+
3+
import java.net.URI;
4+
import java.util.Set;
5+
import java.util.concurrent.ExecutionException;
6+
import java.util.concurrent.TimeUnit;
7+
8+
import org.eclipse.rdf4j.rio.RDFParseException;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
12+
import com.google.common.cache.CacheBuilder;
13+
import com.google.common.cache.CacheLoader;
14+
import com.google.common.cache.LoadingCache;
15+
16+
import no.hasmac.jsonld.JsonLdError;
17+
import no.hasmac.jsonld.document.Document;
18+
import no.hasmac.jsonld.loader.DocumentLoader;
19+
import no.hasmac.jsonld.loader.DocumentLoaderOptions;
20+
import no.hasmac.jsonld.loader.SchemeRouter;
21+
22+
public class CachingDocumentLoader implements DocumentLoader {
23+
private static final DocumentLoader defaultLoader = SchemeRouter.defaultInstance();
24+
private static final Logger logger = LoggerFactory.getLogger(CachingDocumentLoader.class);
25+
26+
private static final LoadingCache<URI, Document> cache = CacheBuilder.newBuilder()
27+
.maximumSize(1000) // Maximum 1000 documents in cache
28+
.expireAfterWrite(1, TimeUnit.HOURS) // Expire after 1 hour
29+
.concurrencyLevel(8) // Optimize for 8 concurrent threads
30+
.build(new CacheLoader<>() {
31+
@Override
32+
public Document load(URI url) throws Exception {
33+
return defaultLoader.loadDocument(url, new DocumentLoaderOptions());
34+
}
35+
});
36+
37+
private final boolean secureMode;
38+
private final Set<String> whitelist;
39+
private final boolean documentLoaderCache;
40+
41+
public CachingDocumentLoader(boolean secureMode, Set<String> whitelist, boolean documentLoaderCache) {
42+
this.secureMode = secureMode;
43+
this.whitelist = whitelist;
44+
this.documentLoaderCache = documentLoaderCache;
45+
}
46+
47+
@Override
48+
public Document loadDocument(URI uri, DocumentLoaderOptions options) {
49+
50+
try {
51+
if (!secureMode || whitelist.contains(uri.toString())) {
52+
if (documentLoaderCache) {
53+
try {
54+
return cache.get(uri);
55+
} catch (ExecutionException e) {
56+
if (e.getCause() != null) {
57+
throw new RDFParseException("Could not load document from " + uri, e.getCause());
58+
}
59+
throw new RDFParseException("Could not load document from " + uri, e);
60+
}
61+
} else {
62+
try {
63+
return defaultLoader.loadDocument(uri, options);
64+
} catch (JsonLdError e) {
65+
throw new RDFParseException("Could not load document from " + uri, e);
66+
}
67+
}
68+
} else {
69+
throw new RDFParseException("Could not load document from " + uri
70+
+ " because it is not whitelisted. See: JSONLDSettings.WHITELIST and JSONLDSettings.SECURE_MODE");
71+
}
72+
} catch (RDFParseException e) {
73+
logger.error(e.getMessage(), e);
74+
throw e;
75+
}
76+
}
77+
}

core/rio/jsonld/src/main/java/org/eclipse/rdf4j/rio/jsonld/JSONLDParser.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,19 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.rio.jsonld;
1212

13+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.DOCUMENT_LOADER_CACHE;
14+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.SECURE_MODE;
15+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.WHITELIST;
16+
1317
import java.io.IOException;
1418
import java.io.InputStream;
1519
import java.io.Reader;
1620
import java.net.URI;
1721
import java.net.URISyntaxException;
1822
import java.util.Collection;
23+
import java.util.List;
1924
import java.util.Optional;
25+
import java.util.Set;
2026
import java.util.function.BiConsumer;
2127

2228
import org.eclipse.rdf4j.model.IRI;
@@ -126,12 +132,21 @@ private void parse(InputStream in, Reader reader, String baseURI)
126132
BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES);
127133
}
128134

135+
boolean secureMode = getParserConfig().get(SECURE_MODE);
136+
boolean documentLoaderCache = getParserConfig().get(DOCUMENT_LOADER_CACHE);
137+
138+
Set<String> whitelist = getParserConfig().get(WHITELIST);
139+
129140
JsonLdOptions opts = new JsonLdOptions();
130141
opts.setUriValidation(false);
131142
opts.setExceptionOnWarning(getParserConfig().get(JSONLDSettings.EXCEPTION_ON_WARNING));
132143

133144
Document context = getParserConfig().get(JSONLDSettings.EXPAND_CONTEXT);
134145

146+
DocumentLoader defaultDocumentLoader = opts.getDocumentLoader();
147+
CachingDocumentLoader cachingDocumentLoader = new CachingDocumentLoader(secureMode, whitelist,
148+
documentLoaderCache);
149+
135150
if (context != null) {
136151

137152
opts.setExpandContext(context);
@@ -142,22 +157,21 @@ private void parse(InputStream in, Reader reader, String baseURI)
142157
throw new RDFParseException("Expand context is not a valid JSON document");
143158
}
144159
opts.getContextCache().put(context.getDocumentUrl().toString(), jsonContent.get());
145-
opts.setDocumentLoader(new DocumentLoader() {
146-
147-
private final DocumentLoader defaultDocumentLoader = SchemeRouter.defaultInstance();
148-
149-
@Override
150-
public Document loadDocument(URI url, DocumentLoaderOptions options) throws JsonLdError {
151-
if (url.equals(context.getDocumentUrl())) {
152-
return context;
153-
}
154-
return defaultDocumentLoader.loadDocument(url, options);
160+
opts.setDocumentLoader((uri, options) -> {
161+
if (uri.equals(context.getDocumentUrl())) {
162+
return context;
155163
}
164+
165+
return cachingDocumentLoader.loadDocument(uri, options);
156166
});
157167
}
158168

159169
}
160170

171+
if (secureMode && opts.getDocumentLoader() == defaultDocumentLoader) {
172+
opts.setDocumentLoader(cachingDocumentLoader);
173+
}
174+
161175
if (baseURI != null && !baseURI.isEmpty()) {
162176
URI uri = new URI(baseURI);
163177
opts.setBase(uri);

core/rio/jsonld/src/test/java/org/eclipse/rdf4j/rio/jsonld/JSONLDParserCustomTest.java

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,39 @@
1111
package org.eclipse.rdf4j.rio.jsonld;
1212

1313
import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
14+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.SECURE_MODE;
15+
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.WHITELIST;
1416
import static org.junit.jupiter.api.Assertions.assertEquals;
1517
import static org.junit.jupiter.api.Assertions.assertTrue;
1618

19+
import java.io.File;
1720
import java.io.StringReader;
1821
import java.net.URI;
22+
import java.nio.charset.StandardCharsets;
23+
import java.util.Set;
1924

25+
import org.apache.commons.io.FileUtils;
26+
import org.apache.commons.lang3.StringUtils;
2027
import org.eclipse.rdf4j.model.IRI;
2128
import org.eclipse.rdf4j.model.Literal;
2229
import org.eclipse.rdf4j.model.Model;
2330
import org.eclipse.rdf4j.model.Resource;
2431
import org.eclipse.rdf4j.model.Value;
2532
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
2633
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
34+
import org.eclipse.rdf4j.model.vocabulary.FOAF;
2735
import org.eclipse.rdf4j.model.vocabulary.XSD;
36+
import org.eclipse.rdf4j.rio.ParserConfig;
2837
import org.eclipse.rdf4j.rio.RDFFormat;
2938
import org.eclipse.rdf4j.rio.RDFParseException;
3039
import org.eclipse.rdf4j.rio.RDFParser;
3140
import org.eclipse.rdf4j.rio.Rio;
3241
import org.eclipse.rdf4j.rio.helpers.ContextStatementCollector;
3342
import org.eclipse.rdf4j.rio.helpers.JSONLDSettings;
3443
import org.eclipse.rdf4j.rio.helpers.ParseErrorCollector;
44+
import org.junit.jupiter.api.Assertions;
3545
import org.junit.jupiter.api.BeforeEach;
46+
import org.junit.jupiter.api.RepeatedTest;
3647
import org.junit.jupiter.api.Test;
3748

3849
import no.hasmac.jsonld.document.Document;
@@ -228,4 +239,85 @@ public void testContext() throws Exception {
228239
parser.parse(new StringReader(LOADER_JSONLD), "");
229240
assertTrue(model.predicates().contains(testPredicate));
230241
}
242+
243+
@Test
244+
public void testLocalFileSecurity() throws Exception {
245+
246+
String contextUri = JSONLDParserCustomTest.class.getClassLoader()
247+
.getResource("testcases/jsonld/localFileContext/context.jsonld")
248+
.toString();
249+
250+
String jsonld = FileUtils
251+
.readFileToString(new File(JSONLDParserCustomTest.class.getClassLoader()
252+
.getResource("testcases/jsonld/localFileContext/data.jsonld")
253+
.getFile()), StandardCharsets.UTF_8)
254+
.replace("file:./context.jsonld", contextUri);
255+
256+
// expect exception
257+
RDFParseException rdfParseException = Assertions.assertThrowsExactly(RDFParseException.class, () -> {
258+
parser.parse(new StringReader(jsonld), "");
259+
});
260+
261+
Assertions.assertEquals("Could not load document from " + contextUri
262+
+ " because it is not whitelisted. See: JSONLDSettings.WHITELIST and JSONLDSettings.SECURE_MODE",
263+
rdfParseException.getMessage());
264+
}
265+
266+
@Test
267+
public void testLocalFileSecurityWhiteList() throws Exception {
268+
String jsonld = FileUtils.readFileToString(new File(JSONLDParserCustomTest.class.getClassLoader()
269+
.getResource("testcases/jsonld/localFileContext/data.jsonld")
270+
.getFile()), StandardCharsets.UTF_8);
271+
String contextUri = JSONLDParserCustomTest.class.getClassLoader()
272+
.getResource("testcases/jsonld/localFileContext/context.jsonld")
273+
.toString();
274+
jsonld = jsonld.replace("file:./context.jsonld", contextUri);
275+
276+
parser.getParserConfig().set(WHITELIST, Set.of(contextUri));
277+
278+
parser.parse(new StringReader(jsonld), "");
279+
assertTrue(model.objects().contains(FOAF.PERSON));
280+
}
281+
282+
@Test
283+
public void testLocalFileSecurityDisableSecurity() throws Exception {
284+
String jsonld = FileUtils.readFileToString(new File(JSONLDParserCustomTest.class.getClassLoader()
285+
.getResource("testcases/jsonld/localFileContext/data.jsonld")
286+
.getFile()), StandardCharsets.UTF_8);
287+
jsonld = jsonld.replace("file:./context.jsonld",
288+
JSONLDParserCustomTest.class.getClassLoader()
289+
.getResource("testcases/jsonld/localFileContext/context.jsonld")
290+
.toString());
291+
292+
parser.getParserConfig().set(SECURE_MODE, false);
293+
294+
parser.parse(new StringReader(jsonld), "");
295+
assertTrue(model.objects().contains(FOAF.PERSON));
296+
}
297+
298+
@RepeatedTest(10)
299+
public void testRemoteContext() throws Exception {
300+
String jsonld = FileUtils.readFileToString(new File(JSONLDParserCustomTest.class.getClassLoader()
301+
.getResource("testcases/jsonld/remoteContext/data.jsonld")
302+
.getFile()), StandardCharsets.UTF_8);
303+
304+
parser.getParserConfig().set(WHITELIST, Set.of("https://schema.org"));
305+
parser.parse(new StringReader(jsonld), "");
306+
assertEquals(59, model.size());
307+
}
308+
309+
@Test
310+
public void testRemoteContextException() throws Exception {
311+
String jsonld = FileUtils.readFileToString(new File(JSONLDParserCustomTest.class.getClassLoader()
312+
.getResource("testcases/jsonld/remoteContextException/data.jsonld")
313+
.getFile()), StandardCharsets.UTF_8);
314+
315+
parser.getParserConfig().set(WHITELIST, Set.of("https://example.org/context.jsonld"));
316+
RDFParseException rdfParseException = Assertions.assertThrowsExactly(RDFParseException.class, () -> {
317+
parser.parse(new StringReader(jsonld), "");
318+
});
319+
320+
assertEquals("Could not load document from https://example.org/context.jsonld", rdfParseException.getMessage());
321+
}
322+
231323
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"@context":
3+
{
4+
"Person": "http://xmlns.com/foaf/0.1/Person"
5+
}
6+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"@context": "file:./context.jsonld",
3+
4+
"@id":"http://example/peter",
5+
"@type":"Person"
6+
7+
8+
9+
}

0 commit comments

Comments
 (0)