Skip to content

GH-3654 JSON-LD 1.1 security and caching #4957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.helpers;

import java.util.List;
import java.util.Set;

import org.eclipse.rdf4j.rio.RioSetting;

import com.github.jsonldjava.core.DocumentLoader;
Expand Down Expand Up @@ -153,6 +156,66 @@ public class JSONLDSettings {
public static final RioSetting<Boolean> HIERARCHICAL_VIEW = new BooleanRioSetting(
"org.eclipse.rdf4j.rio.jsonld.hierarchical_view", "Hierarchical representation of the JSON", Boolean.FALSE);

/**
* Whitelist of remote/local resources that the JSON-LD parser can retrieve. Set of URIs as strings.
* <p>
* Default:
* {@code Set.of("http://www.w3.org/ns/anno.jsonld", "http://www.w3.org/ns/activitystreams.jsonld", "http://www.w3.org/ns/ldp.jsonld", "http://www.w3.org/ns/oa.jsonld", "http://www.w3.org/ns/hydra/context.jsonld", "http://schema.org/", "https://w3id.org/security/v1", "https://w3c.github.io/json-ld-rc/context.jsonld", "https://www.w3.org/2018/credentials/v1", "https://health-lifesci.schema.org/", "https://auto.schema.org/", "https://bib.schema.org/", "http://xmlns.com/foaf/spec/index.jsonld", "https://pending.schema.org/", "https://schema.org/", "https://schema.org/docs/jsonldcontext.jsonld", "https://schema.org/version/latest/schemaorg-current-https.jsonld", "https://schema.org/version/latest/schemaorg-all-http.jsonld", "https://schema.org/version/latest/schemaorg-all-https.jsonld", "https://schema.org/version/latest/schemaorg-current-http.jsonld", "https://schema.org/version/latest/schemaorg-all.jsonld", "https://schema.org/version/latest/schemaorg-current.jsonld", "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld", "https://geojson.org/geojson-ld/geojson-context.jsonld", "https://www.w3.org/2019/wot/td/v1");
*
*/
public static final RioSetting<Set<String>> WHITELIST = new RioSettingImpl<>(
"org.eclipse.rdf4j.rio.jsonld_whitelist",
"Whitelist of remote/local resources that the JSON-LD parser can retrieve. Set of URIs as strings.",
Set.of(
"http://www.w3.org/ns/anno.jsonld",
"http://www.w3.org/ns/activitystreams.jsonld",
"http://www.w3.org/ns/ldp.jsonld",
"http://www.w3.org/ns/oa.jsonld",
"http://www.w3.org/ns/hydra/context.jsonld",
"http://schema.org/",
"https://w3id.org/security/v1",
"https://w3c.github.io/json-ld-rc/context.jsonld",
"https://www.w3.org/2018/credentials/v1",
"https://health-lifesci.schema.org/",
"https://auto.schema.org/",
"https://bib.schema.org/",
"http://xmlns.com/foaf/spec/index.jsonld",
"https://pending.schema.org/",
"https://schema.org/",
"https://schema.org/docs/jsonldcontext.jsonld",
"https://schema.org/version/latest/schemaorg-current-https.jsonld",
"https://schema.org/version/latest/schemaorg-all-http.jsonld",
"https://schema.org/version/latest/schemaorg-all-https.jsonld",
"https://schema.org/version/latest/schemaorg-current-http.jsonld",
"https://schema.org/version/latest/schemaorg-all.jsonld",
"https://schema.org/version/latest/schemaorg-current.jsonld",
"https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld",
"https://geojson.org/geojson-ld/geojson-context.jsonld",
"https://www.w3.org/2019/wot/td/v1"
));

/**
* Secure mode only allows loading remote/local resources (ex. context from url) that are whitelisted.
* <p>
* Default: true
*/
public static final RioSetting<Boolean> SECURE_MODE = new RioSettingImpl<>(
"org.eclipse.rdf4j.rio.jsonld_secure_mode",
"Secure mode only allows loading remote/local resources (ex. context from url) that are whitelisted.",
Boolean.TRUE);

/**
* The document loader cache is enabled by default. All loaded documents, such as remote contexts, are cached for 1
* hour, or until the cache is full. The cache holds up to 1000 documents. The cache is shared between all
* JSONLDParsers. The cache can be disabled by setting this value to false.
* <p>
* Default: true
*/
public static final RioSetting<Boolean> DOCUMENT_LOADER_CACHE = new RioSettingImpl<>(
"org.eclipse.rdf4j.rio.jsonld_document_loader_cache",
"The document loader cache is enabled by default. All loaded documents, such as remote contexts, are cached for 1 hour, or until the cache is full. The cache holds up to 1000 documents. The cache is shared between all JSONLDParsers. The cache can be disabled by setting this value to false.",
Boolean.TRUE);

/**
* Private default constructor.
*/
Expand Down
4 changes: 4 additions & 0 deletions core/rio/jsonld/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>rdf4j-rio-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

package org.eclipse.rdf4j.rio.jsonld;

import java.net.URI;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

import org.eclipse.rdf4j.rio.RDFParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;

import no.hasmac.jsonld.JsonLdError;
import no.hasmac.jsonld.document.Document;
import no.hasmac.jsonld.loader.DocumentLoader;
import no.hasmac.jsonld.loader.DocumentLoaderOptions;
import no.hasmac.jsonld.loader.SchemeRouter;

public class CachingDocumentLoader implements DocumentLoader {
private static final DocumentLoader defaultLoader = SchemeRouter.defaultInstance();
private static final Logger logger = LoggerFactory.getLogger(CachingDocumentLoader.class);

private static final LoadingCache<URI, Document> cache = CacheBuilder.newBuilder()
.maximumSize(1000) // Maximum 1000 documents in cache
.expireAfterWrite(1, TimeUnit.HOURS) // Expire after 1 hour
.concurrencyLevel(8) // Optimize for 8 concurrent threads
.build(new CacheLoader<>() {
@Override
public Document load(URI url) throws Exception {
return defaultLoader.loadDocument(url, new DocumentLoaderOptions());
}
});

private final boolean secureMode;
private final Set<String> whitelist;
private final boolean documentLoaderCache;

public CachingDocumentLoader(boolean secureMode, Set<String> whitelist, boolean documentLoaderCache) {
this.secureMode = secureMode;
this.whitelist = whitelist;
this.documentLoaderCache = documentLoaderCache;
}

@Override
public Document loadDocument(URI uri, DocumentLoaderOptions options) {

try {
if (!secureMode || whitelist.contains(uri.toString())) {
if (documentLoaderCache) {
try {
return cache.get(uri);
} catch (ExecutionException e) {
if (e.getCause() != null) {
throw new RDFParseException("Could not load document from " + uri, e.getCause());
}
throw new RDFParseException("Could not load document from " + uri, e);
}
} else {
try {
return defaultLoader.loadDocument(uri, options);
} catch (JsonLdError e) {
throw new RDFParseException("Could not load document from " + uri, e);
}
}
} else {
throw new RDFParseException("Could not load document from " + uri
+ " because it is not whitelisted. See: JSONLDSettings.WHITELIST and JSONLDSettings.SECURE_MODE");
}
} catch (RDFParseException e) {
logger.error(e.getMessage(), e);
throw e;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.jsonld;

import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.DOCUMENT_LOADER_CACHE;
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.SECURE_MODE;
import static org.eclipse.rdf4j.rio.helpers.JSONLDSettings.WHITELIST;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;

import org.eclipse.rdf4j.model.IRI;
Expand Down Expand Up @@ -48,8 +53,6 @@
import no.hasmac.jsonld.document.JsonDocument;
import no.hasmac.jsonld.lang.Keywords;
import no.hasmac.jsonld.loader.DocumentLoader;
import no.hasmac.jsonld.loader.DocumentLoaderOptions;
import no.hasmac.jsonld.loader.SchemeRouter;
import no.hasmac.rdf.RdfConsumer;
import no.hasmac.rdf.RdfValueFactory;

Expand Down Expand Up @@ -126,12 +129,21 @@ private void parse(InputStream in, Reader reader, String baseURI)
BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES);
}

boolean secureMode = getParserConfig().get(SECURE_MODE);
boolean documentLoaderCache = getParserConfig().get(DOCUMENT_LOADER_CACHE);

Set<String> whitelist = getParserConfig().get(WHITELIST);

JsonLdOptions opts = new JsonLdOptions();
opts.setUriValidation(false);
opts.setExceptionOnWarning(getParserConfig().get(JSONLDSettings.EXCEPTION_ON_WARNING));

Document context = getParserConfig().get(JSONLDSettings.EXPAND_CONTEXT);

DocumentLoader defaultDocumentLoader = opts.getDocumentLoader();
CachingDocumentLoader cachingDocumentLoader = new CachingDocumentLoader(secureMode, whitelist,
documentLoaderCache);

if (context != null) {

opts.setExpandContext(context);
Expand All @@ -142,22 +154,21 @@ private void parse(InputStream in, Reader reader, String baseURI)
throw new RDFParseException("Expand context is not a valid JSON document");
}
opts.getContextCache().put(context.getDocumentUrl().toString(), jsonContent.get());
opts.setDocumentLoader(new DocumentLoader() {

private final DocumentLoader defaultDocumentLoader = SchemeRouter.defaultInstance();

@Override
public Document loadDocument(URI url, DocumentLoaderOptions options) throws JsonLdError {
if (url.equals(context.getDocumentUrl())) {
return context;
}
return defaultDocumentLoader.loadDocument(url, options);
opts.setDocumentLoader((uri, options) -> {
if (uri.equals(context.getDocumentUrl())) {
return context;
}

return cachingDocumentLoader.loadDocument(uri, options);
});
}

}

if (secureMode && opts.getDocumentLoader() == defaultDocumentLoader) {
opts.setDocumentLoader(cachingDocumentLoader);
}

if (baseURI != null && !baseURI.isEmpty()) {
URI uri = new URI(baseURI);
opts.setBase(uri);
Expand Down
Loading
Loading