Skip to content

Commit c082858

Browse files
authored
Support older postings formats (#85303)
Adds support for the Lucene 5 postings format (used by Lucene 6 and 7). Relates #81210
1 parent 68f4322 commit c082858

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+15293
-79
lines changed

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java

Lines changed: 3 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -10,48 +10,31 @@
1010
import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
1111
import org.apache.lucene.codecs.Codec;
1212
import org.apache.lucene.codecs.FieldInfosFormat;
13-
import org.apache.lucene.codecs.FieldsConsumer;
14-
import org.apache.lucene.codecs.FieldsProducer;
1513
import org.apache.lucene.codecs.KnnVectorsFormat;
1614
import org.apache.lucene.codecs.NormsFormat;
17-
import org.apache.lucene.codecs.NormsProducer;
1815
import org.apache.lucene.codecs.PointsFormat;
19-
import org.apache.lucene.codecs.PostingsFormat;
2016
import org.apache.lucene.codecs.SegmentInfoFormat;
2117
import org.apache.lucene.codecs.TermVectorsFormat;
2218
import org.apache.lucene.index.FieldInfo;
2319
import org.apache.lucene.index.FieldInfos;
24-
import org.apache.lucene.index.Fields;
25-
import org.apache.lucene.index.IndexOptions;
2620
import org.apache.lucene.index.SegmentInfo;
27-
import org.apache.lucene.index.SegmentReadState;
28-
import org.apache.lucene.index.SegmentWriteState;
29-
import org.apache.lucene.index.Terms;
3021
import org.apache.lucene.store.Directory;
3122
import org.apache.lucene.store.IOContext;
3223
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
3324

3425
import java.io.IOException;
3526
import java.util.ArrayList;
36-
import java.util.Iterator;
3727
import java.util.List;
3828

3929
/**
4030
* Base class for older BWC codecs
4131
*/
4232
public abstract class BWCCodec extends Codec {
4333

44-
private final PostingsFormat postingsFormat = new EmptyPostingsFormat();
45-
4634
protected BWCCodec(String name) {
4735
super(name);
4836
}
4937

50-
@Override
51-
public PostingsFormat postingsFormat() {
52-
return postingsFormat;
53-
}
54-
5538
@Override
5639
public NormsFormat normsFormat() {
5740
throw new UnsupportedOperationException();
@@ -72,62 +55,6 @@ public KnnVectorsFormat knnVectorsFormat() {
7255
throw new UnsupportedOperationException();
7356
}
7457

75-
/**
76-
* In-memory postings format that shows no postings available.
77-
* TODO: Remove once https://issues.apache.org/jira/browse/LUCENE-10291 is fixed.
78-
*/
79-
static class EmptyPostingsFormat extends PostingsFormat {
80-
81-
protected EmptyPostingsFormat() {
82-
super("EmptyPostingsFormat");
83-
}
84-
85-
@Override
86-
public FieldsConsumer fieldsConsumer(SegmentWriteState state) {
87-
return new FieldsConsumer() {
88-
@Override
89-
public void write(Fields fields, NormsProducer norms) {
90-
throw new UnsupportedOperationException();
91-
}
92-
93-
@Override
94-
public void close() {
95-
96-
}
97-
};
98-
}
99-
100-
@Override
101-
public FieldsProducer fieldsProducer(SegmentReadState state) {
102-
return new FieldsProducer() {
103-
@Override
104-
public void close() {
105-
106-
}
107-
108-
@Override
109-
public void checkIntegrity() {
110-
111-
}
112-
113-
@Override
114-
public Iterator<String> iterator() {
115-
return null;
116-
}
117-
118-
@Override
119-
public Terms terms(String field) {
120-
return null;
121-
}
122-
123-
@Override
124-
public int size() {
125-
return 0;
126-
}
127-
};
128-
}
129-
}
130-
13158
protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
13259
return new SegmentInfoFormat() {
13360
@Override
@@ -158,7 +85,7 @@ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSu
15885
};
15986
}
16087

161-
// mark all fields as having no postings, no term vectors, no norms, no payloads, no points, and no vectors.
88+
// mark all fields as no term vectors, no norms, no payloads, no points, and no vectors.
16289
private static FieldInfos filterFields(FieldInfos fieldInfos) {
16390
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
16491
for (FieldInfo fieldInfo : fieldInfos) {
@@ -167,9 +94,9 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) {
16794
fieldInfo.name,
16895
fieldInfo.number,
16996
false,
97+
true,
17098
false,
171-
false,
172-
IndexOptions.NONE,
99+
fieldInfo.getIndexOptions(),
173100
fieldInfo.getDocValuesType(),
174101
fieldInfo.getDocValuesGen(),
175102
fieldInfo.attributes(),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* @notice
3+
* Licensed to the Apache Software Foundation (ASF) under one or more
4+
* contributor license agreements. See the NOTICE file distributed with
5+
* this work for additional information regarding copyright ownership.
6+
* The ASF licenses this file to You under the Apache License, Version 2.0
7+
* (the "License"); you may not use this file except in compliance with
8+
* the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Modifications copyright (C) 2021 Elasticsearch B.V.
19+
*/
20+
package org.elasticsearch.xpack.lucene.bwc.codecs;
21+
22+
import org.apache.lucene.codecs.FieldsConsumer;
23+
import org.apache.lucene.codecs.FieldsProducer;
24+
import org.apache.lucene.codecs.NormsProducer;
25+
import org.apache.lucene.codecs.PostingsFormat;
26+
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
27+
import org.apache.lucene.index.FieldInfo;
28+
import org.apache.lucene.index.Fields;
29+
import org.apache.lucene.index.IndexOptions;
30+
import org.apache.lucene.index.MergeState;
31+
import org.apache.lucene.index.SegmentReadState;
32+
import org.apache.lucene.index.SegmentWriteState;
33+
import org.apache.lucene.index.Terms;
34+
import org.elasticsearch.core.IOUtils;
35+
36+
import java.io.Closeable;
37+
import java.io.IOException;
38+
import java.util.ArrayList;
39+
import java.util.Collections;
40+
import java.util.HashMap;
41+
import java.util.IdentityHashMap;
42+
import java.util.Iterator;
43+
import java.util.List;
44+
import java.util.Map;
45+
import java.util.TreeMap;
46+
47+
/**
48+
* Modified version of {@link PerFieldPostingsFormat} that allows swapping in
49+
* {@link org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat} instead of
50+
* {@link org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat} when reading from older
51+
* codecs. The former has full support for older Lucene versions (going back to Lucene 5) while the
52+
* latter only supports Lucene 7 and above (as it was shipped with backwards-codecs of Lucene 9 that
53+
* only has support for N-2).
54+
*
55+
* This class can probably be removed once we are on Lucene 10 and Lucene50PostingsFormat is no longer
56+
* shipped as part of bwc jars.
57+
*
58+
* Swapping out formats can be done via the {@link #getPostingsFormat(String) method}.
59+
*/
60+
public abstract class LegacyAdaptingPerFieldPostingsFormat extends PostingsFormat {
61+
/** Name of this {@link PostingsFormat}. */
62+
public static final String PER_FIELD_NAME = "PerField40";
63+
64+
/** {@link FieldInfo} attribute name used to store the format name for each field. */
65+
public static final String PER_FIELD_FORMAT_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".format";
66+
67+
/** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */
68+
public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix";
69+
70+
/** Sole constructor. */
71+
protected LegacyAdaptingPerFieldPostingsFormat() {
72+
super(PER_FIELD_NAME);
73+
}
74+
75+
static String getSuffix(String formatName, String suffix) {
76+
return formatName + "_" + suffix;
77+
}
78+
79+
protected PostingsFormat getPostingsFormat(String formatName) {
80+
throw new IllegalArgumentException(formatName);
81+
}
82+
83+
private class FieldsWriter extends FieldsConsumer {
84+
final SegmentWriteState writeState;
85+
final List<Closeable> toClose = new ArrayList<Closeable>();
86+
87+
FieldsWriter(SegmentWriteState writeState) {
88+
this.writeState = writeState;
89+
}
90+
91+
@Override
92+
public void write(Fields fields, NormsProducer norms) throws IOException {
93+
throw new IllegalStateException("This codec should only be used for reading, not writing");
94+
}
95+
96+
@Override
97+
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
98+
throw new IllegalStateException("This codec should only be used for reading, not writing");
99+
}
100+
101+
@Override
102+
public void close() throws IOException {
103+
IOUtils.close(toClose);
104+
}
105+
}
106+
107+
private static class FieldsReader extends FieldsProducer {
108+
109+
private final Map<String, FieldsProducer> fields = new TreeMap<>();
110+
private final Map<String, FieldsProducer> formats = new HashMap<>();
111+
private final String segment;
112+
113+
// clone for merge
114+
FieldsReader(FieldsReader other) {
115+
Map<FieldsProducer, FieldsProducer> oldToNew = new IdentityHashMap<>();
116+
// First clone all formats
117+
for (Map.Entry<String, FieldsProducer> ent : other.formats.entrySet()) {
118+
FieldsProducer values = ent.getValue().getMergeInstance();
119+
formats.put(ent.getKey(), values);
120+
oldToNew.put(ent.getValue(), values);
121+
}
122+
123+
// Then rebuild fields:
124+
for (Map.Entry<String, FieldsProducer> ent : other.fields.entrySet()) {
125+
FieldsProducer producer = oldToNew.get(ent.getValue());
126+
assert producer != null;
127+
fields.put(ent.getKey(), producer);
128+
}
129+
130+
segment = other.segment;
131+
}
132+
133+
FieldsReader(final SegmentReadState readState, LegacyAdaptingPerFieldPostingsFormat legacyAdaptingPerFieldPostingsFormat)
134+
throws IOException {
135+
136+
// Read _X.per and init each format:
137+
boolean success = false;
138+
try {
139+
// Read field name -> format name
140+
for (FieldInfo fi : readState.fieldInfos) {
141+
if (fi.getIndexOptions() != IndexOptions.NONE) {
142+
final String fieldName = fi.name;
143+
final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
144+
if (formatName != null) {
145+
// null formatName means the field is in fieldInfos, but has no postings!
146+
final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
147+
if (suffix == null) {
148+
throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
149+
}
150+
PostingsFormat format = legacyAdaptingPerFieldPostingsFormat.getPostingsFormat(formatName);
151+
String segmentSuffix = getSuffix(formatName, suffix);
152+
if (formats.containsKey(segmentSuffix) == false) {
153+
formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
154+
}
155+
fields.put(fieldName, formats.get(segmentSuffix));
156+
}
157+
}
158+
}
159+
success = true;
160+
} finally {
161+
if (success == false) {
162+
IOUtils.closeWhileHandlingException(formats.values());
163+
}
164+
}
165+
166+
this.segment = readState.segmentInfo.name;
167+
}
168+
169+
@Override
170+
public Iterator<String> iterator() {
171+
return Collections.unmodifiableSet(fields.keySet()).iterator();
172+
}
173+
174+
@Override
175+
public Terms terms(String field) throws IOException {
176+
FieldsProducer fieldsProducer = fields.get(field);
177+
return fieldsProducer == null ? null : fieldsProducer.terms(field);
178+
}
179+
180+
@Override
181+
public int size() {
182+
return fields.size();
183+
}
184+
185+
@Override
186+
public void close() throws IOException {
187+
IOUtils.close(formats.values());
188+
}
189+
190+
@Override
191+
public void checkIntegrity() throws IOException {
192+
for (FieldsProducer producer : formats.values()) {
193+
producer.checkIntegrity();
194+
}
195+
}
196+
197+
@Override
198+
public FieldsProducer getMergeInstance() {
199+
return new FieldsReader(this);
200+
}
201+
202+
@Override
203+
public String toString() {
204+
return "PerFieldPostings(segment=" + segment + " formats=" + formats.size() + ")";
205+
}
206+
}
207+
208+
@Override
209+
public final FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
210+
return new FieldsWriter(state);
211+
}
212+
213+
@Override
214+
public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
215+
return new FieldsReader(state, this);
216+
}
217+
}

0 commit comments

Comments
 (0)