Skip to content
This repository has been archived by the owner on Aug 8, 2020. It is now read-only.

Commit

Permalink
Add caching ability for SAX EntityResolver.
Browse files Browse the repository at this point in the history
  • Loading branch information
furfurylic committed Mar 3, 2016
1 parent 08f8a89 commit 5306114
Show file tree
Hide file tree
Showing 15 changed files with 289 additions and 104 deletions.
241 changes: 241 additions & 0 deletions src/net/furfurylic/chionographis/CachingResolver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
/*
* Chionographis
*
* These codes are licensed under CC0.
* https://creativecommons.org/publicdomain/zero/1.0/deed
*/

package net.furfurylic.chionographis;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.Optional;
import java.util.WeakHashMap;
import java.util.function.Consumer;
import java.util.function.Function;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.URIResolver;
import javax.xml.transform.dom.DOMSource;

import org.w3c.dom.Document;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

final class CachingResolver implements EntityResolver, URIResolver {

private static final Object LOCK = new Object();

private static Map<URI, URI> canonicalizedURIsForBytes_;
private static Map<URI, Optional<byte[]>> bytes_;

private static Map<URI, URI> canonicalizedURIsForSources_;
private static Map<URI, Optional<Source>> sources_;

Consumer<URI> listenStored_;
Consumer<URI> listenHit_;

public CachingResolver(Consumer<URI> listenStored, Consumer<URI> listenHit) {
listenStored_ = listenStored;
listenHit_ = listenHit;
}

@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
if (systemId == null) {
return null;
}
URI uri = URI.create(systemId);
uri = uniquifyURI(uri);
if (uri == null) {
return null;
}

synchronized (LOCK) {
if (bytes_ == null) {
canonicalizedURIsForBytes_ = Collections.synchronizedMap(new WeakHashMap<URI, URI>());
bytes_ = Collections.synchronizedMap(new IdentityHashMap<>());
}
}

Optional<byte[]> cached = accessCache(uri, canonicalizedURIsForBytes_, bytes_, u -> {
try {
if (u.getScheme().toLowerCase().equals("file")) {
File file = new File(u);
long length = file.length();
if (length <= Integer.MAX_VALUE) {
byte[] content = new byte[(int) length];
try (DataInputStream in = new DataInputStream(new FileInputStream(file))) {
in.readFully(content);
}
return content;
}
}
byte[] buffer = new byte[4096];
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
try (InputStream in = u.toURL().openStream()) {
int length;
while ((length = in.read(buffer)) != -1) {
bytes.write(buffer, 0, length);
}
}
return bytes.toByteArray();
} catch (IOException e) {
return null;
}
});

if (cached.isPresent()) {
InputSource inputSource = new InputSource(new ByteArrayInputStream(cached.get()));
inputSource.setSystemId(systemId);
inputSource.setPublicId(publicId);
return inputSource;
} else {
return null;
}
}

@Override
public Source resolve(String href, String base) throws TransformerException {
URI uri;
if (base == null) {
uri = URI.create(href);
} else {
uri = URI.create(base).resolve(href);
}
uri = uniquifyURI(uri);
if (uri == null) {
return null;
}

synchronized (LOCK) {
if (sources_ == null) {
canonicalizedURIsForSources_ = Collections.synchronizedMap(new WeakHashMap<URI, URI>());
sources_ = Collections.synchronizedMap(new IdentityHashMap<>());
}
}

Optional<Source> cached = accessCache(uri, canonicalizedURIsForSources_, sources_, u -> {
DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
dbfac.setNamespaceAware(true);
try {
DocumentBuilder builder = dbfac.newDocumentBuilder();
builder.setEntityResolver(this);
Document document = builder.parse(u.toString());
return new DOMSource(document, u.toString());
} catch (ParserConfigurationException | SAXException | IOException e) {
return null;
}
});
return cached.orElse(null);
}

/**
* Normalizes a URI in terms of its logical content.
*
* @param uri
* a URI to normalize.
*
* @return
* the normalized URI.
*/
private static URI uniquifyURI(URI uri) {
if (!uri.isAbsolute()) {
return null;
}
if (uri.getScheme().toLowerCase().equals("file")) {
// Afraid that omission of "xx/../" may break path meanings for symbolic links
try {
uri = Paths.get(uri).toRealPath().toUri();
} catch (IOException e) {
return null;
}
} else {
uri = uri.normalize();
}
return uri;
}

/**
*
* @param uri
* a URI.
* @param canonicalURIs
* @param cache
* a possibly identity-based synchronized map.
* @param factory
* a factory function which make an object from a URI.
*
* @return
* a possibly-empty resolved object.
*/
private <T> Optional<T> accessCache(URI uri, Map<URI, URI> canonicalURIs, Map<URI, Optional<T>> cache, Function<URI, ? extends T> factory) {
// Get the canonicalized form
URI canonicalizedURI = canonicalizeURI(uri, canonicalURIs);

// From here uri shall not be in the canonicalized form
if (canonicalizedURI == uri) {
uri = URI.create(uri.toString());
}

// Lock with privately-canonicalized form
synchronized (canonicalizedURI) {
Optional<T> cached = cache.get(canonicalizedURI);
if (cached != null) {
if (!cached.isPresent()) {
// Means that an error occurred in the previous try.
return null;
} else {
// Cache hit.
listenHit_.accept(uri);
}
} else {
cached = Optional.<T>ofNullable(factory.apply(uri));
if (cached.isPresent()) {
listenStored_.accept(uri);
}
cache.put(canonicalizedURI, cached);
}
return cached;
}
}

/**
* Canonicalizes a URI so that URIs which have the same logical content are one same object.
*
* @param uri
* a URI to canonicalize.
* @param canonicalURIs
* a canonicalization mapping for URIs.
*
* @return
* the canonicalized form of the URI,
* which is different object from the parameter <i>uri</i>
* if it did not come from this method.
*/
private static URI canonicalizeURI(URI uri, Map<URI, URI> canonicalURIs) {
assert uri != null;
URI existing = canonicalURIs.putIfAbsent(uri, uri);
if (existing == null) {
return uri;
} else {
return existing;
}
}
}
12 changes: 12 additions & 0 deletions src/net/furfurylic/chionographis/Chionographis.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,17 @@ public final class Chionographis extends MatchingTask implements SinkDriver {

private Path srcDir_;
private Path baseDir_;
private boolean usesCache_;
private int prefixCount_;

private Map<String, String> prefixMap_;
private Sinks sinks_;

/**
* Sole constructor.
*/
public Chionographis() {
usesCache_ = false;
}

/**
Expand Down Expand Up @@ -100,6 +103,10 @@ public void setSrcDir(String srcDir) {
srcDir_ = Paths.get(srcDir);
}

public void setCache(boolean cache) {
usesCache_ = cache;
}

/**
* Adds a prefix-namespace URI mapping entry to this task.
*
Expand Down Expand Up @@ -280,6 +287,11 @@ public void execute() {
} else {
sinks_.log(this, " PI search not required", LogLevel.DEBUG);
}
if (usesCache_) {
reader.setEntityResolver(new CachingResolver(
u -> sinks_.log(this, "Caching " + u, LogLevel.DEBUG),
u -> sinks_.log(this, "Reusing " + u, LogLevel.DEBUG)));
}

// Do processing.
identity.transform(new SAXSource(reader, input), result);
Expand Down
75 changes: 0 additions & 75 deletions src/net/furfurylic/chionographis/Transform.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,8 @@
package net.furfurylic.chionographis;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
Expand All @@ -21,25 +18,17 @@
import java.util.function.Consumer;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.URIResolver;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamSource;

import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.types.LogLevel;
import org.w3c.dom.Document;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
Expand All @@ -51,9 +40,6 @@
*/
public final class Transform extends Sink implements SinkDriver {

private static final Object LOCK = new Object();
private static Map<URI, Source> sources_;

private Sinks sinks_;
private String style_;
private boolean usesCache_;
Expand Down Expand Up @@ -347,65 +333,4 @@ public void skippedEntity(String name) throws SAXException {
handler_.skippedEntity(name);
}
}

private static class CachingResolver implements URIResolver {

Consumer<URI> listenStored_;
Consumer<URI> listenHit_;

public CachingResolver(Consumer<URI> listenStored, Consumer<URI> listenHit) {
listenStored_ = listenStored;
listenHit_ = listenHit;
}

@Override
public Source resolve(String href, String base) throws TransformerException {
URI uri;
if (base == null) {
uri = URI.create(href);
} else {
uri = URI.create(base).resolve(href);
}
if (!uri.isAbsolute()) {
return null;
}
if (uri.getScheme().toLowerCase().equals("file")) {
// Afraid that omission of "xx/../" is bad for symbolic links
try {
uri = Paths.get(uri).toRealPath().toUri();
} catch (IOException e) {
return null;
}
} else {
uri = uri.normalize();
}
synchronized (LOCK) {
if (sources_ == null) {
sources_ = new HashMap<>();
}
}
Source cached;
synchronized (LOCK) {
cached = sources_.get(uri);
if (cached == null) {
if (!sources_.containsKey(uri)) {
DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
dbfac.setNamespaceAware(true);
try {
DocumentBuilder builder = dbfac.newDocumentBuilder();
Document document = builder.parse(uri.toString());
cached = new DOMSource(document, uri.toString());
listenStored_.accept(uri);
} catch (ParserConfigurationException | SAXException | IOException e) {
}
sources_.put(uri, cached);
}
} else {
listenHit_.accept(uri);
}
}
return cached;
}

}
}
1 change: 1 addition & 0 deletions test/crossing/input-cache/X.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
X
File renamed without changes.
Loading

0 comments on commit 5306114

Please sign in to comment.