Skip to content

Add an optional extended parser subclass (YAMLAnchorReplayingFactory) able to inline anchors #502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package com.fasterxml.jackson.dataformat.yaml;

import java.io.CharArrayReader;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;

import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.io.IOContext;

/**
* A subclass of YAMLFactory with the only purpose to replace the YAMLParser by the YAMLAnchorReplayingParser subclass
*/
public class YAMLAnchorReplayingFactory extends YAMLFactory {
public YAMLAnchorReplayingFactory() {
super();
}

public YAMLAnchorReplayingFactory(ObjectCodec oc) {
super(oc);
}

public YAMLAnchorReplayingFactory(YAMLFactory src, ObjectCodec oc) {
super(src, oc);
}

protected YAMLAnchorReplayingFactory(YAMLFactoryBuilder b) {
super(b);
}

@Override
public YAMLAnchorReplayingFactory copy() {
this._checkInvalidCopy(YAMLAnchorReplayingFactory.class);
return new YAMLAnchorReplayingFactory(this, (ObjectCodec) null);
}

@Override
protected Object readResolve() {
return new YAMLAnchorReplayingFactory(this, this._objectCodec);
}

@Override
protected YAMLParser _createParser(InputStream input, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, this._parserFeatures, this._yamlParserFeatures, this._loaderOptions, this._objectCodec, this._createReader(input, (JsonEncoding) null, ctxt));
}

@Override
protected YAMLParser _createParser(Reader r, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, this._parserFeatures, this._yamlParserFeatures, this._loaderOptions, this._objectCodec, r);
}

@Override
protected YAMLParser _createParser(char[] data, int offset, int len, IOContext ctxt, boolean recyclable) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, this._parserFeatures, this._yamlParserFeatures, this._loaderOptions, this._objectCodec, new CharArrayReader(data, offset, len));
}

@Override
protected YAMLParser _createParser(byte[] data, int offset, int len, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, this._parserFeatures, this._yamlParserFeatures, this._loaderOptions, this._objectCodec, this._createReader(data, offset, len, (JsonEncoding) null, ctxt));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package com.fasterxml.jackson.dataformat.yaml;

import java.io.Reader;
import java.io.IOException;

import java.util.ArrayList;
import java.util.ArrayDeque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.events.AliasEvent;
import org.yaml.snakeyaml.events.Event;
import org.yaml.snakeyaml.events.MappingEndEvent;
import org.yaml.snakeyaml.events.MappingStartEvent;
import org.yaml.snakeyaml.events.NodeEvent;
import org.yaml.snakeyaml.events.ScalarEvent;
import org.yaml.snakeyaml.events.CollectionEndEvent;
import org.yaml.snakeyaml.events.CollectionStartEvent;
import org.yaml.snakeyaml.nodes.MappingNode;

import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.io.IOContext;

/**
* A parser that remembers the events of anchored parts in yaml and repeats them
* to inline these parts when an alias if found instead of only returning an alias.
*
* Note: this overwrites the getEvent() since the base `super.nextToken()` manages to much state and
* it seems to be much simpler to re-emit the events.
*/
public class YAMLAnchorReplayingParser extends YAMLParser {
private static class AnchorContext {
public final String anchor;
public final List<Event> events = new ArrayList<>();
public int depth = 1;

public AnchorContext(String anchor) {
this.anchor = anchor;
}
}

/**
* the maximum number of events that can be replayed
*/
public static final int MAX_EVENTS = 9999;

/**
* the maximum limit of anchors to remember
*/
public static final int MAX_ANCHORS = 9999;

/**
* the maximum limit of merges to follow
*/
public static final int MAX_MERGES = 9999;

/**
* the maximum limit of references to remember
*/
public static final int MAX_REFS = 9999;

/**
* Remembers when a merge has been started in order to skip the corresponding
* sequence end which needs to be excluded
*/
private final ArrayDeque<Integer> mergeStack = new ArrayDeque<>();

/**
* Collects nested anchor definitions
*/
private final ArrayDeque<AnchorContext> tokenStack = new ArrayDeque<>();

/**
* Keeps track of the last sequentially found definition of each anchor
*/
private final Map<String, List<Event>> referencedObjects = new HashMap<>();

/**
* Keeps track of events that have been insert when processing alias
*/
private final ArrayDeque<Event> refEvents = new ArrayDeque<>();

/**
* keeps track of the global depth of nested collections
*/
private int globalDepth = 0;

public YAMLAnchorReplayingParser(IOContext ctxt, int parserFeatures, int formatFeatures, LoaderOptions loaderOptions, ObjectCodec codec, Reader reader) {
super(ctxt, parserFeatures, formatFeatures, loaderOptions, codec, reader);
}

private void finishContext(AnchorContext context) {
if (referencedObjects.size() + 1 > MAX_REFS) throw new IllegalStateException("too many references in the document");
referencedObjects.put(context.anchor, context.events);
if (!tokenStack.isEmpty()) {
List<Event> events = tokenStack.peek().events;
if (events.size() + context.events.size() > MAX_EVENTS) throw new IllegalStateException("too many events to replay");
events.addAll(context.events);
}
}

protected Event trackDepth(Event event) {
if (event instanceof CollectionStartEvent) {
++globalDepth;
} else if (event instanceof CollectionEndEvent) {
--globalDepth;
}
return event;
}

protected Event filterEvent(Event event) {
if (event instanceof MappingEndEvent) {
if (!mergeStack.isEmpty()) {
if (mergeStack.peek() > globalDepth) {
mergeStack.pop();
return null;
}
}
}
return event;
}

@Override
protected Event getEvent() {
while(!refEvents.isEmpty()) {
Event event = filterEvent(trackDepth(refEvents.removeFirst()));
if (event != null) return event;
}

Event event = null;
while (event == null) {
event = trackDepth(super.getEvent());
if (event == null) return null;
event = filterEvent(event);
}

if (event instanceof AliasEvent) {
AliasEvent alias = (AliasEvent) event;
List<Event> events = referencedObjects.get(alias.getAnchor());
if (events != null) {
if (refEvents.size() + events.size() > MAX_EVENTS) throw new IllegalStateException("too many events to replay");
refEvents.addAll(events);
return refEvents.removeFirst();
}
throw new IllegalStateException("invalid alias " + alias.getAnchor());
}

if (event instanceof NodeEvent) {
String anchor = ((NodeEvent) event).getAnchor();
if (anchor != null) {
AnchorContext context = new AnchorContext(anchor);
context.events.add(event);
if (event instanceof CollectionStartEvent) {
if (tokenStack.size() + 1 > MAX_ANCHORS) throw new IllegalStateException("too many anchors in the document");
tokenStack.push(context);
} else {
// directly store it
finishContext(context);
}
return event;
}
}

if (event instanceof ScalarEvent) {
ScalarEvent scalarEvent = (ScalarEvent) event;
if (scalarEvent.getValue().equals( "<<")) {
// expect next node to be a map
Event next = getEvent();
if (next instanceof MappingStartEvent) {
if (mergeStack.size() + 1 > MAX_MERGES) throw new IllegalStateException("too many merges in the document");
mergeStack.push(globalDepth);
return getEvent();
}
throw new IllegalStateException("found field '<<' but value isn't a map");
}
}

if (!tokenStack.isEmpty()) {
AnchorContext context = tokenStack.peek();
if (context.events.size() + 1 > MAX_EVENTS) throw new IllegalStateException("too many events to replay");
context.events.add(event);
if (event instanceof CollectionStartEvent) {
++context.depth;
} else if (event instanceof CollectionEndEvent) {
--context.depth;
if (context.depth == 0) {
tokenStack.pop();
finishContext(context);
}
}
}
return event;
}
}
Loading