Skip to content
This repository has been archived by the owner on Aug 8, 2020. It is now read-only.

Commit

Permalink
Merge pull request #4 from furfurylic/transform-cache
Browse files Browse the repository at this point in the history
Add caching features into Transform filter
  • Loading branch information
furfurylic committed Mar 4, 2016
2 parents 30dafa9 + 8e47d2f commit bfadb81
Show file tree
Hide file tree
Showing 13 changed files with 324 additions and 4 deletions.
241 changes: 241 additions & 0 deletions src/net/furfurylic/chionographis/CachingResolver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
/*
* Chionographis
*
* These codes are licensed under CC0.
* https://creativecommons.org/publicdomain/zero/1.0/deed
*/

package net.furfurylic.chionographis;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.Optional;
import java.util.WeakHashMap;
import java.util.function.Consumer;
import java.util.function.Function;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.URIResolver;
import javax.xml.transform.dom.DOMSource;

import org.w3c.dom.Document;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

final class CachingResolver implements EntityResolver, URIResolver {

private static final NetResourceCache<byte[]> BYTES = new NetResourceCache<>();
private static final NetResourceCache<Source> TREES = new NetResourceCache<>();

private Consumer<URI> listenStored_;
private Consumer<URI> listenHit_;

public CachingResolver(Consumer<URI> listenStored, Consumer<URI> listenHit) {
listenStored_ = listenStored;
listenHit_ = listenHit;
}

@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
if (systemId == null) {
return null;
}
URI uri = URI.create(systemId);
uri = uniquifyURI(uri);
if (uri == null) {
return null;
}

Optional<byte[]> cached = BYTES.accessCache(uri, listenStored_, listenHit_, u -> {
try {
if (u.getScheme().toLowerCase().equals("file")) {
File file = new File(u);
long length = file.length();
if (length <= Integer.MAX_VALUE) {
byte[] content = new byte[(int) length];
try (DataInputStream in = new DataInputStream(new FileInputStream(file))) {
in.readFully(content);
}
return content;
}
}
byte[] buffer = new byte[4096];
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
try (InputStream in = u.toURL().openStream()) {
int length;
while ((length = in.read(buffer)) != -1) {
bytes.write(buffer, 0, length);
}
}
return bytes.toByteArray();
} catch (IOException e) {
return null;
}
});

if (cached.isPresent()) {
InputSource inputSource = new InputSource(new ByteArrayInputStream(cached.get()));
inputSource.setSystemId(systemId);
inputSource.setPublicId(publicId);
return inputSource;
} else {
return null;
}
}

@Override
public Source resolve(String href, String base) throws TransformerException {
URI uri;
if (base == null) {
uri = URI.create(href);
} else {
uri = URI.create(base).resolve(href);
}
uri = uniquifyURI(uri);
if (uri == null) {
return null;
}

Optional<Source> cached = TREES.accessCache(uri, listenStored_, listenHit_, u -> {
DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
dbfac.setNamespaceAware(true);
try {
DocumentBuilder builder = dbfac.newDocumentBuilder();
builder.setEntityResolver(this);
Document document = builder.parse(u.toString());
return new DOMSource(document, u.toString());
} catch (ParserConfigurationException | SAXException | IOException e) {
return null;
}
});
return cached.orElse(null);
}

/**
* Normalizes a URI in terms of its logical content.
*
* @param uri
* a URI to normalize.
*
* @return
* the normalized URI.
*/
private static URI uniquifyURI(URI uri) {
assert uri != null;
if (!uri.isAbsolute()) {
return null;
}
if (uri.getScheme().toLowerCase().equals("file")) {
// Afraid that omission of "xx/../" may break path meanings for symbolic links
try {
uri = Paths.get(uri).toRealPath().toUri();
} catch (IOException e) {
return null;
}
} else {
uri = uri.normalize();
}
return uri;
}

private static class NetResourceCache<T> {

private final Object LOCK = new Object();

/** A synchronized canonicalization mapping for URIs. */
private Map<URI, URI> canonURIs_;

/** A possibly identity-based synchronized map. */
private Map<URI, Optional<T>> cache_;

/**
*
* @param uri
* a URI.
* @param factory
* a factory function which make an object from a URI.
*
* @return
* a possibly-empty resolved object.
*/
public Optional<T> accessCache(URI uri,
Consumer<URI> listenStored, Consumer<URI> listenHit,
Function<URI, ? extends T> factory) {
assert uri != null;
assert uri.isAbsolute();

synchronized (LOCK) {
if (cache_ == null) {
canonURIs_ = Collections.synchronizedMap(new WeakHashMap<URI, URI>());
cache_ = Collections.synchronizedMap(new IdentityHashMap<>());
}
}

// Get the canonicalized form
URI canonicalizedURI = canonicalizeURI(uri);

// From here uri shall not be in the canonicalized form
if (canonicalizedURI == uri) {
uri = URI.create(uri.toString());
}

// Lock with privately-canonicalized form
synchronized (canonicalizedURI) {
Optional<T> cached = cache_.get(canonicalizedURI);
if (cached != null) {
if (!cached.isPresent()) {
// Means that an error occurred in the previous try.
return null;
} else {
// Cache hit.
listenHit.accept(uri);
}
} else {
cached = Optional.<T>ofNullable(factory.apply(uri));
if (cached.isPresent()) {
listenStored.accept(uri);
}
cache_.put(canonicalizedURI, cached);
}
return cached;
}
}

/**
* Canonicalizes a URI so that URIs which have the same logical content are one same object.
*
* @param uri
* a URI to canonicalize.
*
* @return
* the canonicalized form of the URI,
* which is different object from the parameter <i>uri</i>
* if it did not come from this method.
*/
private URI canonicalizeURI(URI uri) {
assert uri != null;
URI existing = canonURIs_.putIfAbsent(uri, uri);
if (existing == null) {
return uri;
} else {
return existing;
}
}
}
}
12 changes: 12 additions & 0 deletions src/net/furfurylic/chionographis/Chionographis.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,17 @@ public final class Chionographis extends MatchingTask implements SinkDriver {

private Path srcDir_;
private Path baseDir_;
private boolean usesCache_;
private int prefixCount_;

private Map<String, String> prefixMap_;
private Sinks sinks_;

/**
* Sole constructor.
*/
public Chionographis() {
usesCache_ = false;
}

/**
Expand Down Expand Up @@ -100,6 +103,10 @@ public void setSrcDir(String srcDir) {
srcDir_ = Paths.get(srcDir);
}

public void setCache(boolean cache) {
usesCache_ = cache;
}

/**
* Adds a prefix-namespace URI mapping entry to this task.
*
Expand Down Expand Up @@ -280,6 +287,11 @@ public void execute() {
} else {
sinks_.log(this, " PI search not required", LogLevel.DEBUG);
}
if (usesCache_) {
reader.setEntityResolver(new CachingResolver(
u -> sinks_.log(this, "Caching " + u, LogLevel.DEBUG),
u -> sinks_.log(this, "Reusing " + u, LogLevel.DEBUG)));
}

// Do processing.
identity.transform(new SAXSource(reader, input), result);
Expand Down
21 changes: 19 additions & 2 deletions src/net/furfurylic/chionographis/Transform.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,22 @@ public final class Transform extends Sink implements SinkDriver {

private Sinks sinks_;
private String style_;
private boolean usesCache_;
private int paramCount_;

private SAXTransformerFactory tfac_;
private URI styleURI_;

private Templates stylesheet_;
private Map<String, Object> params_;
private CachingResolver resolver_;
private Map<String, Object> params_;

private String output_;

Transform(Logger logger) {
sinks_ = new Sinks(logger);
style_ = null;
usesCache_ = false;
paramCount_ = 0;
params_ = Collections.<String, Object>emptyMap();
}
Expand All @@ -71,6 +75,10 @@ public void setStyle(String style) {
}
// TODO: Make this class able to accept non-file stylesheet URI

public void setCache(boolean cache) {
usesCache_ = cache;
}

/**
* Adds a stylesheet parameter.
*
Expand Down Expand Up @@ -200,12 +208,21 @@ Result startOne(int originalSrcIndex, String originalSrcFileName) {
tfac_ = (SAXTransformerFactory) TransformerFactory.newInstance();
String styleSystemID = styleURI_.toString();
sinks_.log(this, "Compiling stylesheet: " + styleSystemID, LogLevel.VERBOSE);
if (usesCache_) {
resolver_ = new CachingResolver(
u -> sinks_.log(this, "Caching " + u, LogLevel.DEBUG),
u -> sinks_.log(this, "Reusing " + u, LogLevel.DEBUG));
tfac_.setURIResolver(resolver_);
}
stylesheet_ = tfac_.newTemplates(new StreamSource(styleSystemID));
}
TransformerHandler styler = tfac_.newTransformerHandler(stylesheet_);
for (Map.Entry<String, Object> param : params_.entrySet()) {
styler.getTransformer().setParameter(param.getKey(), param.getValue());
}
if (usesCache_) {
styler.getTransformer().setURIResolver(resolver_);
}
styler.setResult(sinks_.startOne(originalSrcIndex, originalSrcFileName));
ContentHandler handler;
if (sinks_.needsOutput()) {
Expand Down Expand Up @@ -314,6 +331,6 @@ public void processingInstruction(String target, String data) throws SAXExceptio
@Override
public void skippedEntity(String name) throws SAXException {
handler_.skippedEntity(name);
}
}
}
}
1 change: 1 addition & 0 deletions test/crossing/input-cache/X.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
X
4 changes: 4 additions & 0 deletions test/crossing/input-cache/base.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="text"/>
<xsl:template match="*"><xsl:value-of select="local-name(.)"/>=<xsl:value-of select="."/></xsl:template>
</xsl:stylesheet>
1 change: 1 addition & 0 deletions test/crossing/input-cache/expected1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a=100
1 change: 1 addition & 0 deletions test/crossing/input-cache/expected2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
***x=X***
6 changes: 6 additions & 0 deletions test/crossing/input-cache/external.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" standalone="no"?>
<!DOCTYPE map SYSTEM "input.dtd">
<map>
<a>&OneHundred;</a>
<b>200</b>
</map>
2 changes: 2 additions & 0 deletions test/crossing/input-cache/input.dtd
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<!ENTITY X SYSTEM "X.txt">
<!ENTITY OneHundred "100">
3 changes: 3 additions & 0 deletions test/crossing/input-cache/input.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<?xml version="1.0" standalone="no"?>
<!DOCTYPE x SYSTEM "input.dtd">
<x>&X;</x>
5 changes: 5 additions & 0 deletions test/crossing/input-cache/style1.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:include href="base.xsl"/>
<xsl:variable name="external" select="document('external.xml')"/>
<xsl:template match="/"><xsl:apply-templates select="$external//a"/></xsl:template>
</xsl:stylesheet>
4 changes: 4 additions & 0 deletions test/crossing/input-cache/style2.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:import href="base.xsl"/>
<xsl:template match="/">***<xsl:apply-imports/>***</xsl:template>
</xsl:stylesheet>
Loading

0 comments on commit bfadb81

Please sign in to comment.