Skip to content

Commit 0685af2

Browse files
authored
Add codec support for Lucene 6 and 7 versions (#81258)
Adds Lucene support for reading _id and _source from ES 5 / ES 6 indices. Relates #81210
1 parent 27b3d30 commit 0685af2

File tree

17 files changed

+978
-208
lines changed

17 files changed

+978
-208
lines changed

server/src/main/java/org/elasticsearch/index/IndexModule.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,13 @@ public Settings getSettings() {
210210
return indexSettings.getSettings();
211211
}
212212

213+
/**
214+
* Returns the {@link IndexSettings} for this index
215+
*/
216+
public IndexSettings indexSettings() {
217+
return indexSettings;
218+
}
219+
213220
/**
214221
* Returns the index this module is associated with
215222
*/

server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ public Status needsField(FieldInfo fieldInfo) {
6666
if (IgnoredFieldMapper.NAME.equals(fieldInfo.name)) {
6767
return Status.YES;
6868
}
69+
// support _uid for loading older indices
70+
if ("_uid".equals(fieldInfo.name)) {
71+
return Status.YES;
72+
}
6973
// All these fields are single-valued so we can stop when the set is
7074
// empty
7175
return requiredFields.isEmpty() ? Status.STOP : Status.NO;
@@ -103,9 +107,18 @@ public void binaryField(FieldInfo fieldInfo, BytesRef value) {
103107

104108
@Override
105109
public void stringField(FieldInfo fieldInfo, String value) {
106-
assert IdFieldMapper.NAME.equals(fieldInfo.name) == false : "_id field must go through binaryField";
107110
assert sourceFieldName.equals(fieldInfo.name) == false : "source field must go through binaryField";
108-
addValue(fieldInfo.name, value);
111+
if ("_uid".equals(fieldInfo.name)) {
112+
// 5.x-only
113+
int delimiterIndex = value.indexOf('#'); // type is not allowed to have # in it..., ids can
114+
// type = value.substring(0, delimiterIndex);
115+
id = value.substring(delimiterIndex + 1);
116+
} else if (IdFieldMapper.NAME.equals(fieldInfo.name)) {
117+
// only applies to 5.x indices that have single_type = true
118+
id = value;
119+
} else {
120+
addValue(fieldInfo.name, value);
121+
}
109122
}
110123

111124
@Override

server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.elasticsearch.common.io.stream.Writeable;
1717
import org.elasticsearch.common.unit.ByteSizeValue;
1818
import org.elasticsearch.common.xcontent.XContentParserUtils;
19+
import org.elasticsearch.core.Nullable;
1920
import org.elasticsearch.index.store.StoreFileMetadata;
2021
import org.elasticsearch.xcontent.ParseField;
2122
import org.elasticsearch.xcontent.ToXContentFragment;
@@ -41,6 +42,7 @@ public static class FileInfo implements Writeable {
4142
public static final String SERIALIZE_WRITER_UUID = "serialize_writer_uuid";
4243

4344
private final String name;
45+
@Nullable
4446
private final ByteSizeValue partSize;
4547
private final long partBytes;
4648
private final int numberOfParts;
@@ -53,7 +55,7 @@ public static class FileInfo implements Writeable {
5355
* @param metadata the files meta data
5456
* @param partSize size of the single chunk
5557
*/
56-
public FileInfo(String name, StoreFileMetadata metadata, ByteSizeValue partSize) {
58+
public FileInfo(String name, StoreFileMetadata metadata, @Nullable ByteSizeValue partSize) {
5759
this.name = Objects.requireNonNull(name);
5860
this.metadata = metadata;
5961

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldLuceneVersions.java

Lines changed: 38 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,10 @@
77

88
package org.elasticsearch.xpack.lucene.bwc;
99

10-
import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
11-
import org.apache.lucene.codecs.Codec;
12-
import org.apache.lucene.codecs.CodecUtil;
1310
import org.apache.lucene.index.SegmentCommitInfo;
1411
import org.apache.lucene.index.SegmentInfo;
1512
import org.apache.lucene.index.SegmentInfos;
16-
import org.apache.lucene.store.ChecksumIndexInput;
17-
import org.apache.lucene.store.Directory;
18-
import org.apache.lucene.store.IOContext;
19-
import org.apache.lucene.util.StringHelper;
20-
import org.apache.lucene.util.Version;
21-
import org.elasticsearch.Build;
13+
import org.elasticsearch.Version;
2214
import org.elasticsearch.common.UUIDs;
2315
import org.elasticsearch.common.lucene.Lucene;
2416
import org.elasticsearch.index.IndexModule;
@@ -28,6 +20,7 @@
2820
import org.elasticsearch.index.shard.IndexShard;
2921
import org.elasticsearch.plugins.IndexStorePlugin;
3022
import org.elasticsearch.plugins.Plugin;
23+
import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
3124

3225
import java.io.IOException;
3326
import java.io.UncheckedIOException;
@@ -38,83 +31,69 @@ public class OldLuceneVersions extends Plugin implements IndexStorePlugin {
3831

3932
@Override
4033
public void onIndexModule(IndexModule indexModule) {
41-
if (Build.CURRENT.isSnapshot()) {
34+
if (indexModule.indexSettings().getIndexVersionCreated().before(Version.CURRENT.minimumIndexCompatibilityVersion())) {
4235
indexModule.addIndexEventListener(new IndexEventListener() {
4336
@Override
4437
public void afterFilesRestoredFromRepository(IndexShard indexShard) {
45-
maybeConvertToNewFormat(indexShard);
38+
convertToNewFormat(indexShard);
4639
}
4740
});
4841
}
4942
}
5043

51-
private static void maybeConvertToNewFormat(IndexShard indexShard) {
44+
/**
45+
* The trick used to allow newer Lucene versions to read older Lucene indices is to convert the old directory to a directory that new
46+
* Lucene versions happily operate on. The way newer Lucene versions happily comply with reading older data is to put in place a
47+
* segments file that the newer Lucene version can open, using codecs that allow reading everything from the old files, making it
48+
* available under the newer interfaces. The way this works is to read in the old segments file using a special class
49+
* {@link OldSegmentInfos} that supports reading older Lucene {@link SegmentInfos}, and then write out an updated segments file that
50+
* newer Lucene versions can understand.
51+
*/
52+
private static void convertToNewFormat(IndexShard indexShard) {
5253
indexShard.store().incRef();
5354
try {
54-
try {
55-
Version version = getLuceneVersion(indexShard.store().directory());
56-
// Lucene version in [7.0.0, 8.0.0)
57-
if (version != null
58-
&& version.onOrAfter(Version.fromBits(7, 0, 0))
59-
&& version.onOrAfter(Version.fromBits(8, 0, 0)) == false) {
60-
final OldSegmentInfos oldSegmentInfos = OldSegmentInfos.readLatestCommit(indexShard.store().directory(), 7);
61-
final SegmentInfos segmentInfos = convertLucene7x(oldSegmentInfos);
62-
// write upgraded segments file
63-
segmentInfos.commit(indexShard.store().directory());
55+
final OldSegmentInfos oldSegmentInfos = OldSegmentInfos.readLatestCommit(indexShard.store().directory(), 6);
56+
final SegmentInfos segmentInfos = convertToNewerLuceneVersion(oldSegmentInfos);
57+
// write upgraded segments file
58+
segmentInfos.commit(indexShard.store().directory());
6459

65-
// validate that what we have written can be read using standard path
66-
// TODO: norelease: remove this when development completes
67-
SegmentInfos segmentInfos1 = SegmentInfos.readLatestCommit(indexShard.store().directory());
60+
// what we have written can be read using standard path
61+
assert SegmentInfos.readLatestCommit(indexShard.store().directory()) != null;
6862

69-
// clean older segments file
70-
Lucene.pruneUnreferencedFiles(segmentInfos1.getSegmentsFileName(), indexShard.store().directory());
71-
}
72-
} catch (IOException e) {
73-
throw new UncheckedIOException(e);
74-
}
63+
// clean older segments file
64+
Lucene.pruneUnreferencedFiles(segmentInfos.getSegmentsFileName(), indexShard.store().directory());
65+
} catch (IOException e) {
66+
throw new UncheckedIOException(e);
7567
} finally {
7668
indexShard.store().decRef();
7769
}
7870
}
7971

80-
private static Version getLuceneVersion(Directory directory) throws IOException {
81-
final String segmentFileName = SegmentInfos.getLastCommitSegmentsFileName(directory);
82-
if (segmentFileName != null) {
83-
long generation = SegmentInfos.generationFromSegmentsFileName(segmentFileName);
84-
try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
85-
CodecUtil.checkHeader(input, "segments", 0, Integer.MAX_VALUE);
86-
byte[] id = new byte[StringHelper.ID_LENGTH];
87-
input.readBytes(id, 0, id.length);
88-
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
89-
90-
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
91-
int indexCreatedVersion = input.readVInt();
92-
return luceneVersion;
93-
} catch (Exception e) {
94-
// ignore
95-
}
96-
}
97-
return null;
98-
}
99-
100-
private static SegmentInfos convertLucene7x(OldSegmentInfos oldSegmentInfos) {
72+
private static SegmentInfos convertToNewerLuceneVersion(OldSegmentInfos oldSegmentInfos) {
10173
final SegmentInfos segmentInfos = new SegmentInfos(org.apache.lucene.util.Version.LATEST.major);
10274
segmentInfos.setNextWriteGeneration(oldSegmentInfos.getGeneration() + 1);
10375
final Map<String, String> map = new HashMap<>(oldSegmentInfos.getUserData());
104-
map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
105-
map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
106-
map.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
107-
map.put(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, "-1");
76+
if (map.containsKey(Engine.HISTORY_UUID_KEY) == false) {
77+
map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
78+
}
79+
if (map.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) == false) {
80+
map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
81+
}
82+
if (map.containsKey(SequenceNumbers.MAX_SEQ_NO) == false) {
83+
map.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
84+
}
85+
if (map.containsKey(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID) == false) {
86+
map.put(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, "-1");
87+
}
10888
segmentInfos.setUserData(map, true);
10989
for (SegmentCommitInfo infoPerCommit : oldSegmentInfos.asList()) {
110-
SegmentInfo info = infoPerCommit.info;
111-
SegmentInfo newInfo = wrap(info);
90+
final SegmentInfo newInfo = BWCCodec.wrap(infoPerCommit.info);
11291

11392
segmentInfos.add(
11493
new SegmentCommitInfo(
11594
newInfo,
11695
infoPerCommit.getDelCount(),
117-
0,
96+
infoPerCommit.getSoftDelCount(),
11897
infoPerCommit.getDelGen(),
11998
infoPerCommit.getFieldInfosGen(),
12099
infoPerCommit.getDocValuesGen(),
@@ -125,31 +104,6 @@ private static SegmentInfos convertLucene7x(OldSegmentInfos oldSegmentInfos) {
125104
return segmentInfos;
126105
}
127106

128-
static SegmentInfo wrap(SegmentInfo segmentInfo) {
129-
// Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
130-
// TODO: alternatively store the original version information in attributes?
131-
byte[] id = segmentInfo.getId();
132-
if (id == null) {
133-
id = StringHelper.randomId();
134-
}
135-
Codec codec = segmentInfo.getCodec() instanceof Lucene70Codec ? new BWCLucene70Codec() : segmentInfo.getCodec();
136-
SegmentInfo segmentInfo1 = new SegmentInfo(
137-
segmentInfo.dir,
138-
org.apache.lucene.util.Version.LATEST,
139-
org.apache.lucene.util.Version.LATEST,
140-
segmentInfo.name,
141-
segmentInfo.maxDoc(),
142-
segmentInfo.getUseCompoundFile(),
143-
codec,
144-
segmentInfo.getDiagnostics(),
145-
id,
146-
segmentInfo.getAttributes(),
147-
null
148-
);
149-
segmentInfo1.setFiles(segmentInfo.files());
150-
return segmentInfo1;
151-
}
152-
153107
@Override
154108
public Map<String, DirectoryFactory> getDirectoryFactories() {
155109
return Map.of();

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldSegmentInfos.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
* See the License for the specific language governing permissions and
1616
* limitations under the License.
17+
*
18+
* Modifications copyright (C) 2021 Elasticsearch B.V.
1719
*/
1820

1921
package org.elasticsearch.xpack.lucene.bwc;
@@ -60,6 +62,12 @@
6062
@SuppressWarnings("CheckStyle")
6163
@SuppressForbidden(reason = "Lucene class")
6264
public class OldSegmentInfos implements Cloneable, Iterable<SegmentCommitInfo> {
65+
66+
/**
67+
* Adds the {@link Version} that committed this segments_N file, as well as the {@link Version}
68+
* of the oldest segment, since 5.3+
69+
*/
70+
public static final int VERSION_53 = 6;
6371
/**
6472
* The version that added information about the Lucene version at the time when the index has been
6573
* created.
@@ -209,13 +217,16 @@ static final OldSegmentInfos readCommit(Directory directory, ChecksumIndexInput
209217
if (magic != CodecUtil.CODEC_MAGIC) {
210218
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
211219
}
212-
format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_70, VERSION_CURRENT);
220+
format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_53, VERSION_CURRENT);
213221
byte[] id = new byte[StringHelper.ID_LENGTH];
214222
input.readBytes(id, 0, id.length);
215223
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
216224

217225
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
218-
int indexCreatedVersion = input.readVInt();
226+
int indexCreatedVersion = 6;
227+
if (format >= VERSION_70) {
228+
indexCreatedVersion = input.readVInt();
229+
}
219230
if (luceneVersion.major < indexCreatedVersion) {
220231
throw new CorruptIndexException(
221232
"Creation version ["
@@ -252,7 +263,7 @@ static final OldSegmentInfos readCommit(Directory directory, ChecksumIndexInput
252263
} catch (Throwable t) {
253264
priorE = t;
254265
} finally {
255-
if (format >= VERSION_70) { // oldest supported version
266+
if (format >= VERSION_53) { // oldest supported version
256267
CodecUtil.checkFooter(input, priorE);
257268
} else {
258269
throw IOUtils.rethrowAlways(priorE);
@@ -283,6 +294,14 @@ private static void parseSegmentInfos(Directory directory, DataInput input, OldS
283294
long totalDocs = 0;
284295
for (int seg = 0; seg < numSegments; seg++) {
285296
String segName = input.readString();
297+
if (format < VERSION_70) {
298+
byte hasID = input.readByte();
299+
if (hasID == 0) {
300+
throw new IndexFormatTooOldException(input, "Segment is from Lucene 4.x");
301+
} else if (hasID != 1) {
302+
throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
303+
}
304+
}
286305
byte[] segmentID = new byte[StringHelper.ID_LENGTH];
287306
input.readBytes(segmentID, 0, segmentID.length);
288307
Codec codec = readCodec(input);

0 commit comments

Comments
 (0)