Skip to content

Commit 5aebb8e

Browse files
authored
Add text field support to archive indices (#86591)
Adds support for "text" fields in archive indices, with the goal of adding simple filtering support on text fields when querying archive indices. There are some differences to regular text fields: - no global statistics: queries on text fields return constant score (similar to match_only_text). - analyzer fields can be updated - if defined analyzer is not available, falls back to default analyzer - no guarantees that analyzers are BWC The above limitations also give us the flexibility to eventually swap out the implementation with a "runtime-text field" variant, and hence only provide those capabilities that can be emulated via a runtime field. Relates #81210
1 parent d6519b4 commit 5aebb8e

File tree

22 files changed

+687
-60
lines changed

22 files changed

+687
-60
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
9797
this.analyzers = new TextParams.Analyzers(
9898
indexAnalyzers,
9999
m -> ((MatchOnlyTextFieldMapper) m).indexAnalyzer,
100-
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap
100+
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
101+
indexCreatedVersion
101102
);
102103
}
103104

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.lucene.util.automaton.Automata;
3636
import org.apache.lucene.util.automaton.Automaton;
3737
import org.apache.lucene.util.automaton.Operations;
38+
import org.elasticsearch.Version;
3839
import org.elasticsearch.common.collect.Iterators;
3940
import org.elasticsearch.index.analysis.AnalyzerScope;
4041
import org.elasticsearch.index.analysis.IndexAnalyzers;
@@ -92,7 +93,7 @@ public static class Defaults {
9293
public static final int MAX_SHINGLE_SIZE = 3;
9394
}
9495

95-
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
96+
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
9697

9798
private static Builder builder(FieldMapper in) {
9899
return ((SearchAsYouTypeFieldMapper) in).builder;
@@ -141,12 +142,16 @@ public static class Builder extends FieldMapper.Builder {
141142

142143
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
143144

144-
public Builder(String name, IndexAnalyzers indexAnalyzers) {
145+
private final Version indexCreatedVersion;
146+
147+
public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
145148
super(name);
149+
this.indexCreatedVersion = indexCreatedVersion;
146150
this.analyzers = new TextParams.Analyzers(
147151
indexAnalyzers,
148152
m -> builder(m).analyzers.getIndexAnalyzer(),
149-
m -> builder(m).analyzers.positionIncrementGap.getValue()
153+
m -> builder(m).analyzers.positionIncrementGap.getValue(),
154+
indexCreatedVersion
150155
);
151156
}
152157

@@ -710,7 +715,7 @@ protected String contentType() {
710715
}
711716

712717
public FieldMapper.Builder getMergeBuilder() {
713-
return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
718+
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
714719
}
715720

716721
public static String getShingleFieldName(String parentField, int shingleSize) {

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.lucene.document.FieldType;
2222
import org.apache.lucene.index.IndexOptions;
2323
import org.elasticsearch.ElasticsearchParseException;
24+
import org.elasticsearch.Version;
2425
import org.elasticsearch.index.analysis.AnalyzerScope;
2526
import org.elasticsearch.index.analysis.IndexAnalyzers;
2627
import org.elasticsearch.index.analysis.NamedAnalyzer;
@@ -86,12 +87,16 @@ public static class Builder extends FieldMapper.Builder {
8687

8788
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
8889

89-
public Builder(String name, IndexAnalyzers indexAnalyzers) {
90+
private final Version indexCreatedVersion;
91+
92+
public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
9093
super(name);
94+
this.indexCreatedVersion = indexCreatedVersion;
9195
this.analyzers = new TextParams.Analyzers(
9296
indexAnalyzers,
9397
m -> builder(m).analyzers.getIndexAnalyzer(),
94-
m -> builder(m).analyzers.positionIncrementGap.getValue()
98+
m -> builder(m).analyzers.positionIncrementGap.getValue(),
99+
indexCreatedVersion
95100
);
96101
}
97102

@@ -145,7 +150,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
145150
}
146151
}
147152

148-
public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
153+
public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
149154

150155
/**
151156
* Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
@@ -527,6 +532,6 @@ protected String contentType() {
527532

528533
@Override
529534
public FieldMapper.Builder getMergeBuilder() {
530-
return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
535+
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
531536
}
532537
}

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.apache.lucene.queries.intervals.Intervals;
1212
import org.apache.lucene.queries.intervals.IntervalsSource;
1313
import org.apache.lucene.util.BytesRef;
14+
import org.elasticsearch.Version;
1415
import org.elasticsearch.index.mapper.FieldTypeTestCase;
1516
import org.elasticsearch.index.mapper.MappedFieldType;
1617
import org.elasticsearch.index.mapper.MapperBuilderContext;
@@ -28,7 +29,7 @@ public void testIntervals() throws IOException {
2829
}
2930

3031
public void testFetchSourceValue() throws IOException {
31-
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", createDefaultIndexAnalyzers()).build(
32+
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", Version.CURRENT, createDefaultIndexAnalyzers()).build(
3233
MapperBuilderContext.ROOT
3334
).fieldType();
3435

server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifier.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,12 @@
1818
import org.elasticsearch.common.settings.IndexScopedSettings;
1919
import org.elasticsearch.common.settings.Settings;
2020
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
21-
import org.elasticsearch.core.Nullable;
2221
import org.elasticsearch.index.IndexSettings;
2322
import org.elasticsearch.index.analysis.AnalyzerScope;
2423
import org.elasticsearch.index.analysis.IndexAnalyzers;
2524
import org.elasticsearch.index.analysis.NamedAnalyzer;
26-
import org.elasticsearch.index.mapper.DocumentMapper;
2725
import org.elasticsearch.index.mapper.MapperRegistry;
2826
import org.elasticsearch.index.mapper.MapperService;
29-
import org.elasticsearch.index.mapper.Mapping;
3027
import org.elasticsearch.index.similarity.SimilarityService;
3128
import org.elasticsearch.script.ScriptCompiler;
3229
import org.elasticsearch.script.ScriptService;
@@ -92,7 +89,7 @@ public IndexMetadata verifyIndexMetadata(IndexMetadata indexMetadata, Version mi
9289
// Next we have to run this otherwise if we try to create IndexSettings
9390
// with broken settings it would fail in checkMappingsCompatibility
9491
newMetadata = archiveBrokenIndexSettings(newMetadata);
95-
createAndValidateMapping(newMetadata);
92+
checkMappingsCompatibility(newMetadata);
9693
return newMetadata;
9794
}
9895

@@ -129,10 +126,8 @@ private static void checkSupportedVersion(IndexMetadata indexMetadata, Version m
129126
* Note that we don't expect users to encounter mapping incompatibilities, since our index compatibility
130127
* policy guarantees we can read mappings from previous compatible index versions. A failure here would
131128
* indicate a compatibility bug (which are unfortunately not that uncommon).
132-
* @return the mapping
133129
*/
134-
@Nullable
135-
public Mapping createAndValidateMapping(IndexMetadata indexMetadata) {
130+
private void checkMappingsCompatibility(IndexMetadata indexMetadata) {
136131
try {
137132

138133
// We cannot instantiate real analysis server or similarity service at this point because the node
@@ -199,8 +194,6 @@ public Set<Entry<String, NamedAnalyzer>> entrySet() {
199194
scriptService
200195
);
201196
mapperService.merge(indexMetadata, MapperService.MergeReason.MAPPING_RECOVERY);
202-
DocumentMapper documentMapper = mapperService.documentMapper();
203-
return documentMapper == null ? null : documentMapper.mapping();
204197
}
205198
} catch (Exception ex) {
206199
// Wrap the inner exception so we have the index name in the exception message

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99
package org.elasticsearch.index.mapper;
1010

11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
13+
import org.apache.logging.log4j.message.ParameterizedMessage;
1114
import org.apache.lucene.index.LeafReaderContext;
1215
import org.elasticsearch.Version;
1316
import org.elasticsearch.common.Explicit;
@@ -48,6 +51,8 @@
4851
import java.util.function.Supplier;
4952

5053
public abstract class FieldMapper extends Mapper implements Cloneable {
54+
private static final Logger logger = LogManager.getLogger(FieldMapper.class);
55+
5156
public static final Setting<Boolean> IGNORE_MALFORMED_SETTING = Setting.boolSetting(
5257
"index.mapping.ignore_malformed",
5358
false,
@@ -959,23 +964,48 @@ public static <T extends Enum<T>> Parameter<T> restrictedEnumParam(
959964
* @param updateable whether the parameter can be changed by a mapping update
960965
* @param initializer a function that reads the parameter value from an existing mapper
961966
* @param defaultAnalyzer the default value, to be used if the parameter is undefined in a mapping
967+
* @param indexCreatedVersion the version on which this index was created
962968
*/
963969
public static Parameter<NamedAnalyzer> analyzerParam(
964970
String name,
965971
boolean updateable,
966972
Function<FieldMapper, NamedAnalyzer> initializer,
967-
Supplier<NamedAnalyzer> defaultAnalyzer
973+
Supplier<NamedAnalyzer> defaultAnalyzer,
974+
Version indexCreatedVersion
968975
) {
969976
return new Parameter<>(name, updateable, defaultAnalyzer, (n, c, o) -> {
970977
String analyzerName = o.toString();
971978
NamedAnalyzer a = c.getIndexAnalyzers().get(analyzerName);
972979
if (a == null) {
973-
throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
980+
if (indexCreatedVersion.isLegacyIndexVersion()) {
981+
logger.warn(
982+
new ParameterizedMessage("Could not find analyzer [{}] of legacy index, falling back to default", analyzerName)
983+
);
984+
a = defaultAnalyzer.get();
985+
} else {
986+
throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
987+
}
974988
}
975989
return a;
976990
}, initializer, (b, n, v) -> b.field(n, v.name()), NamedAnalyzer::name);
977991
}
978992

993+
/**
994+
* Defines a parameter that takes an analyzer name
995+
* @param name the parameter name
996+
* @param updateable whether the parameter can be changed by a mapping update
997+
* @param initializer a function that reads the parameter value from an existing mapper
998+
* @param defaultAnalyzer the default value, to be used if the parameter is undefined in a mapping
999+
*/
1000+
public static Parameter<NamedAnalyzer> analyzerParam(
1001+
String name,
1002+
boolean updateable,
1003+
Function<FieldMapper, NamedAnalyzer> initializer,
1004+
Supplier<NamedAnalyzer> defaultAnalyzer
1005+
) {
1006+
return analyzerParam(name, updateable, initializer, defaultAnalyzer, Version.CURRENT);
1007+
}
1008+
9791009
/**
9801010
* Declares a metadata parameter
9811011
*/

0 commit comments

Comments
 (0)