Skip to content

Commit 4560a0c

Browse files
authored
Add XContentFieldFilter (#81970)
This commit introduces XContentFieldFilter, which applies field includes/excludes to XContent without having to realise the xcontent itself as a java map. SourceFieldMapper and ShardGetService are cut over to use this class
1 parent 6c3a3f0 commit 4560a0c

File tree

4 files changed

+111
-34
lines changed

4 files changed

+111
-34
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.common.xcontent;
10+
11+
import org.elasticsearch.common.bytes.BytesReference;
12+
import org.elasticsearch.common.io.stream.BytesStreamOutput;
13+
import org.elasticsearch.common.util.CollectionUtils;
14+
import org.elasticsearch.common.xcontent.support.XContentMapValues;
15+
import org.elasticsearch.core.Nullable;
16+
import org.elasticsearch.core.Tuple;
17+
import org.elasticsearch.xcontent.XContentBuilder;
18+
import org.elasticsearch.xcontent.XContentFactory;
19+
import org.elasticsearch.xcontent.XContentParser;
20+
import org.elasticsearch.xcontent.XContentParserConfiguration;
21+
import org.elasticsearch.xcontent.XContentType;
22+
23+
import java.io.IOException;
24+
import java.util.Arrays;
25+
import java.util.Map;
26+
import java.util.Set;
27+
import java.util.function.Function;
28+
29+
/**
30+
* A filter that filter fields away from source
31+
*/
32+
public interface XContentFieldFilter {
33+
/**
34+
* filter source in {@link BytesReference} format and in {@link XContentType} content type
35+
* note that xContentType may be null in some case, we should guess xContentType from sourceBytes in such cases
36+
*/
37+
BytesReference apply(BytesReference sourceBytes, @Nullable XContentType xContentType) throws IOException;
38+
39+
/**
40+
* Construct {@link XContentFieldFilter} using given includes and excludes
41+
*
42+
* @param includes fields to keep, wildcard supported
43+
* @param excludes fields to remove, wildcard supported
44+
* @return filter using {@link XContentMapValues#filter(String[], String[])} if wildcard found in excludes
45+
* , otherwise return filter using {@link XContentParser}
46+
*/
47+
static XContentFieldFilter newFieldFilter(String[] includes, String[] excludes) {
48+
if ((CollectionUtils.isEmpty(excludes) == false) && Arrays.stream(excludes).filter(field -> field.contains("*")).count() > 0) {
49+
return (originalSource, contentType) -> {
50+
Function<Map<String, ?>, Map<String, Object>> mapFilter = XContentMapValues.filter(includes, excludes);
51+
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
52+
Map<String, Object> filteredSource = mapFilter.apply(mapTuple.v2());
53+
BytesStreamOutput bStream = new BytesStreamOutput();
54+
XContentType actualContentType = mapTuple.v1();
55+
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
56+
builder.close();
57+
return bStream.bytes();
58+
};
59+
} else {
60+
final XContentParserConfiguration parserConfig = XContentParserConfiguration.EMPTY.withFiltering(
61+
Set.of(includes),
62+
Set.of(excludes)
63+
);
64+
return (originalSource, contentType) -> {
65+
if (contentType == null) {
66+
contentType = XContentHelper.xContentTypeMayCompressed(originalSource);
67+
}
68+
BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, originalSource.length()));
69+
XContentBuilder builder = new XContentBuilder(contentType.xContent(), streamOutput);
70+
XContentParser parser = contentType.xContent().createParser(parserConfig, originalSource.streamInput());
71+
builder.copyCurrentStructure(parser);
72+
return BytesReference.bytes(builder);
73+
};
74+
}
75+
}
76+
}

server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,32 @@ public static BytesReference toXContent(ToXContent toXContent, XContentType xCon
518518
}
519519
}
520520

521+
/**
522+
* Guesses the content type based on the provided bytes which may be compressed.
523+
*
524+
* @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
525+
* The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
526+
* This method is deprecated to prevent usages of it from spreading further without specific reasons.
527+
*/
528+
@Deprecated
529+
public static XContentType xContentTypeMayCompressed(BytesReference bytes) {
530+
Compressor compressor = CompressorFactory.compressor(bytes);
531+
if (compressor != null) {
532+
try {
533+
InputStream compressedStreamInput = compressor.threadLocalInputStream(bytes.streamInput());
534+
if (compressedStreamInput.markSupported() == false) {
535+
compressedStreamInput = new BufferedInputStream(compressedStreamInput);
536+
}
537+
return XContentFactory.xContentType(compressedStreamInput);
538+
} catch (IOException e) {
539+
assert false : "Should not happen, we're just reading bytes from memory";
540+
throw new UncheckedIOException(e);
541+
}
542+
} else {
543+
return XContentHelper.xContentType(bytes);
544+
}
545+
}
546+
521547
/**
522548
* Guesses the content type based on the provided bytes.
523549
*

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@
1616
import org.elasticsearch.common.metrics.CounterMetric;
1717
import org.elasticsearch.common.metrics.MeanMetric;
1818
import org.elasticsearch.common.util.set.Sets;
19-
import org.elasticsearch.common.xcontent.XContentHelper;
20-
import org.elasticsearch.common.xcontent.support.XContentMapValues;
19+
import org.elasticsearch.common.xcontent.XContentFieldFilter;
2120
import org.elasticsearch.core.Nullable;
22-
import org.elasticsearch.core.Tuple;
2321
import org.elasticsearch.index.IndexSettings;
2422
import org.elasticsearch.index.VersionType;
2523
import org.elasticsearch.index.engine.Engine;
@@ -33,8 +31,6 @@
3331
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
3432
import org.elasticsearch.index.shard.IndexShard;
3533
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
36-
import org.elasticsearch.xcontent.XContentFactory;
37-
import org.elasticsearch.xcontent.XContentType;
3834

3935
import java.io.IOException;
4036
import java.util.HashMap;
@@ -253,15 +249,11 @@ private GetResult innerGetLoadFromStoredFields(
253249
if (fetchSourceContext.fetchSource() == false) {
254250
source = null;
255251
} else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
256-
Map<String, Object> sourceAsMap;
257252
// TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different.
258253
// Do we care?
259-
Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
260-
XContentType sourceContentType = typeMapTuple.v1();
261-
sourceAsMap = typeMapTuple.v2();
262-
sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
263254
try {
264-
source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
255+
source = XContentFieldFilter.newFieldFilter(fetchSourceContext.includes(), fetchSourceContext.excludes())
256+
.apply(source, null);
265257
} catch (IOException e) {
266258
throw new ElasticsearchException("Failed to get id [" + id + "] with includes/excludes set", e);
267259
}

server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,32 +16,24 @@
1616
import org.apache.lucene.util.BytesRef;
1717
import org.elasticsearch.common.Strings;
1818
import org.elasticsearch.common.bytes.BytesReference;
19-
import org.elasticsearch.common.io.stream.BytesStreamOutput;
2019
import org.elasticsearch.common.util.CollectionUtils;
21-
import org.elasticsearch.common.xcontent.XContentHelper;
22-
import org.elasticsearch.common.xcontent.support.XContentMapValues;
20+
import org.elasticsearch.common.xcontent.XContentFieldFilter;
2321
import org.elasticsearch.core.Nullable;
24-
import org.elasticsearch.core.Tuple;
2522
import org.elasticsearch.index.query.QueryShardException;
2623
import org.elasticsearch.index.query.SearchExecutionContext;
27-
import org.elasticsearch.xcontent.XContentBuilder;
28-
import org.elasticsearch.xcontent.XContentFactory;
2924
import org.elasticsearch.xcontent.XContentType;
3025

3126
import java.io.IOException;
3227
import java.util.Arrays;
3328
import java.util.Collections;
3429
import java.util.List;
35-
import java.util.Map;
36-
import java.util.function.Function;
3730

3831
public class SourceFieldMapper extends MetadataFieldMapper {
39-
4032
public static final String NAME = "_source";
4133
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";
4234

4335
public static final String CONTENT_TYPE = "_source";
44-
private final Function<Map<String, ?>, Map<String, Object>> filter;
36+
private final XContentFieldFilter filter;
4537

4638
private static final SourceFieldMapper DEFAULT = new SourceFieldMapper(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
4739

@@ -145,7 +137,9 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes)
145137
this.includes = includes;
146138
this.excludes = excludes;
147139
final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false;
148-
this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null;
140+
this.filter = enabled && filtered
141+
? XContentFieldFilter.newFieldFilter(includes, excludes)
142+
: (sourceBytes, contentType) -> sourceBytes;
149143
this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes);
150144
}
151145

@@ -180,18 +174,7 @@ public void preParse(DocumentParserContext context) throws IOException {
180174
public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException {
181175
if (enabled && originalSource != null) {
182176
// Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data
183-
if (filter != null) {
184-
// we don't update the context source if we filter, we want to keep it as is...
185-
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
186-
Map<String, Object> filteredSource = filter.apply(mapTuple.v2());
187-
BytesStreamOutput bStream = new BytesStreamOutput();
188-
XContentType actualContentType = mapTuple.v1();
189-
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
190-
builder.close();
191-
return bStream.bytes();
192-
} else {
193-
return originalSource;
194-
}
177+
return filter.apply(originalSource, contentType);
195178
} else {
196179
return null;
197180
}

0 commit comments

Comments
 (0)