From 354624f5a682a82d07727f2d06f6a2da8dabfb9e Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 22 Mar 2022 09:41:43 +0100
Subject: [PATCH 01/19] Support old postings formats

---
 .../core/internal/io/IOUtils.java             |   42 +
 .../xpack/lucene/bwc/codecs/BWCCodec.java     |   81 +-
 .../LegacyAdaptingPerFieldPostingsFormat.java |  200 ++
 .../blocktree/CompressionAlgorithm.java       |   77 +
 .../lucene40/blocktree/FieldReader.java       |  207 ++
 .../blocktree/IntersectTermsEnum.java         |  577 ++++++
 .../blocktree/IntersectTermsEnumFrame.java    |  358 ++++
 .../Lucene40BlockTreeTermsReader.java         |  395 ++++
 .../lucene40/blocktree/SegmentTermsEnum.java  | 1170 +++++++++++
 .../blocktree/SegmentTermsEnumFrame.java      |  765 +++++++
 .../bwc/codecs/lucene40/blocktree/Stats.java  |  277 +++
 .../lucene50/BWCLucene50PostingsFormat.java   |  477 +++++
 .../lucene/bwc/codecs/lucene50/ForUtil.java   |  235 +++
 .../lucene50/Lucene50PostingsReader.java      | 1787 +++++++++++++++++
 .../lucene50/Lucene50ScoreSkipReader.java     |  167 ++
 .../codecs/lucene50/Lucene50SkipReader.java   |  210 ++
 .../bwc/codecs/lucene60/Lucene60Codec.java    |   22 +
 .../bwc/codecs/lucene62/Lucene62Codec.java    |   23 +
 .../bwc/codecs/lucene70/BWCLucene70Codec.java |   13 +
 .../bwc/codecs/lucene70/fst/BitTableUtil.java |  176 ++
 .../lucene70/fst/ByteSequenceOutputs.java     |  164 ++
 .../codecs/lucene70/fst/BytesRefFSTEnum.java  |  129 ++
 .../bwc/codecs/lucene70/fst/BytesStore.java   |  520 +++++
 .../lucene/bwc/codecs/lucene70/fst/FST.java   | 1569 +++++++++++++++
 .../bwc/codecs/lucene70/fst/FSTCompiler.java  |  804 ++++++++
 .../bwc/codecs/lucene70/fst/FSTEnum.java      |  660 ++++++
 .../bwc/codecs/lucene70/fst/FSTStore.java     |   37 +
 .../lucene70/fst/ForwardBytesReader.java      |   64 +
 .../bwc/codecs/lucene70/fst/NodeHash.java     |  192 ++
 .../codecs/lucene70/fst/OffHeapFSTStore.java  |   79 +
 .../codecs/lucene70/fst/OnHeapFSTStore.java   |  103 +
 .../bwc/codecs/lucene70/fst/Outputs.java      |  108 +
 .../lucene70/fst/ReverseBytesReader.java      |   62 +
 .../fst/ReverseRandomAccessReader.java        |   67 +
 .../lucene/bwc/codecs/lucene70/fst/Util.java  |  903 +++++++++
 .../Lucene40BlockTreeTermsWriter.java         | 1124 +++++++++++
 .../lucene50/BlockPostingsFormat2Tests.java   |  149 ++
 .../lucene50/BlockPostingsFormat3Tests.java   |  477 +++++
 .../lucene50/BlockPostingsFormatTests.java    |  138 ++
 .../lucene50/Lucene50PostingsWriter.java      |  513 +++++
 .../lucene50/Lucene50RWPostingsFormat.java    |   56 +
 .../codecs/lucene50/Lucene50SkipWriter.java   |  233 +++
 .../org.apache.lucene.codecs.PostingsFormat   |   16 +
 .../oldrepos/OldRepositoryAccessIT.java       |   19 +
 44 files changed, 15368 insertions(+), 77 deletions(-)
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/CompressionAlgorithm.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnum.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnumFrame.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnum.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnumFrame.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/ForUtil.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50ScoreSkipReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesStore.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTStore.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ForwardBytesReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/NodeHash.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseBytesReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseRandomAccessReader.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat2Tests.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat3Tests.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormatTests.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50RWPostingsFormat.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipWriter.java
 create mode 100644 x-pack/plugin/old-lucene-versions/src/test/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat

diff --git a/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java b/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java
index 0a45fe010bbb5..183ff4111b693 100644
--- a/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java
+++ b/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java
@@ -316,4 +316,46 @@ public static void fsync(final Path fileToSync, final boolean isDir, final boole
             }
         }
     }
+
+    /**
+     * This utility method takes a previously caught (non-null) {@code Throwable} and rethrows either
+     * the original argument if it was a subclass of the {@code IOException} or an {@code
+     * RuntimeException} with the cause set to the argument.
+     *
+     * <p>This method <strong>never returns any value</strong>, even though it declares a return value
+     * of type {@link Error}. The return value declaration is very useful to let the compiler know
+     * that the code path following the invocation of this method is unreachable. So in most cases the
+     * invocation of this method will be guarded by an {@code if} and used together with a {@code
+     * throw} statement, as in:
+     *
+     * <pre>{@code
+     * if (t != null) throw IOUtils.rethrowAlways(t)
+     * }</pre>
+     *
+     * @param th The throwable to rethrow, <strong>must not be null</strong>.
+     * @return This method always results in an exception, it never returns any value. See method
+     *     documentation for details and usage example.
+     * @throws IOException if the argument was an instance of IOException
+     * @throws RuntimeException with the {@link RuntimeException#getCause()} set to the argument, if
+     *     it was not an instance of IOException.
+     */
+    public static Error rethrowAlways(Throwable th) throws IOException, RuntimeException {
+        if (th == null) {
+            throw new AssertionError("rethrow argument must not be null.");
+        }
+
+        if (th instanceof IOException) {
+            throw (IOException) th;
+        }
+
+        if (th instanceof RuntimeException) {
+            throw (RuntimeException) th;
+        }
+
+        if (th instanceof Error) {
+            throw (Error) th;
+        }
+
+        throw new RuntimeException(th);
+    }
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
index b350f6a62404f..be5be0bc6a965 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
@@ -10,30 +10,20 @@
 import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FieldInfosFormat;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.Terms;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -41,17 +31,10 @@
  */
 public abstract class BWCCodec extends Codec {
 
-    private final PostingsFormat postingsFormat = new EmptyPostingsFormat();
-
     protected BWCCodec(String name) {
         super(name);
     }
 
-    @Override
-    public PostingsFormat postingsFormat() {
-        return postingsFormat;
-    }
-
     @Override
     public NormsFormat normsFormat() {
         throw new UnsupportedOperationException();
@@ -72,62 +55,6 @@ public KnnVectorsFormat knnVectorsFormat() {
         throw new UnsupportedOperationException();
     }
 
-    /**
-     * In-memory postings format that shows no postings available.
-     * TODO: Remove once https://issues.apache.org/jira/browse/LUCENE-10291 is fixed.
-     */
-    static class EmptyPostingsFormat extends PostingsFormat {
-
-        protected EmptyPostingsFormat() {
-            super("EmptyPostingsFormat");
-        }
-
-        @Override
-        public FieldsConsumer fieldsConsumer(SegmentWriteState state) {
-            return new FieldsConsumer() {
-                @Override
-                public void write(Fields fields, NormsProducer norms) {
-                    throw new UnsupportedOperationException();
-                }
-
-                @Override
-                public void close() {
-
-                }
-            };
-        }
-
-        @Override
-        public FieldsProducer fieldsProducer(SegmentReadState state) {
-            return new FieldsProducer() {
-                @Override
-                public void close() {
-
-                }
-
-                @Override
-                public void checkIntegrity() {
-
-                }
-
-                @Override
-                public Iterator<String> iterator() {
-                    return null;
-                }
-
-                @Override
-                public Terms terms(String field) {
-                    return null;
-                }
-
-                @Override
-                public int size() {
-                    return 0;
-                }
-            };
-        }
-    }
-
     protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
         return new SegmentInfoFormat() {
             @Override
@@ -158,7 +85,7 @@ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSu
         };
     }
 
-    // mark all fields as having no postings, no term vectors, no norms, no payloads, no points, and no vectors.
+    // mark all fields as no term vectors, no norms, no payloads, no points, and no vectors.
     private static FieldInfos filterFields(FieldInfos fieldInfos) {
         List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
         for (FieldInfo fieldInfo : fieldInfos) {
@@ -167,9 +94,9 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) {
                     fieldInfo.name,
                     fieldInfo.number,
                     false,
+                    true,
                     false,
-                    false,
-                    IndexOptions.NONE,
+                    fieldInfo.getIndexOptions(),
                     fieldInfo.getDocValuesType(),
                     fieldInfo.getDocValuesGen(),
                     fieldInfo.attributes(),
@@ -202,7 +129,7 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
             codec,
             segmentInfo.getDiagnostics(),
             segmentInfo.getId(),
-            segmentInfo.getAttributes(),
+            segmentInfo.getAttributes(), // adapt attributes so that per-field format codecs are overriden
             segmentInfo.getIndexSort()
         );
         segmentInfo1.setFiles(segmentInfo.files());
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java
new file mode 100644
index 0000000000000..8aefcd875834c
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.lucene.bwc.codecs;
+
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.elasticsearch.core.internal.io.IOUtils;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.IdentityHashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+public abstract class LegacyAdaptingPerFieldPostingsFormat extends PostingsFormat {
+    /** Name of this {@link PostingsFormat}. */
+    public static final String PER_FIELD_NAME = "PerField40";
+
+    /** {@link FieldInfo} attribute name used to store the format name for each field. */
+    public static final String PER_FIELD_FORMAT_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".format";
+
+    /** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */
+    public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix";
+
+    /** Sole constructor. */
+    protected LegacyAdaptingPerFieldPostingsFormat() {
+        super(PER_FIELD_NAME);
+    }
+
+    static String getSuffix(String formatName, String suffix) {
+        return formatName + "_" + suffix;
+    }
+
+    protected PostingsFormat getPostingsFormat(String formatName) {
+        throw new IllegalArgumentException(formatName);
+    }
+
+    private class FieldsWriter extends FieldsConsumer {
+        final SegmentWriteState writeState;
+        final List<Closeable> toClose = new ArrayList<Closeable>();
+
+        FieldsWriter(SegmentWriteState writeState) {
+            this.writeState = writeState;
+        }
+
+        @Override
+        public void write(Fields fields, NormsProducer norms) throws IOException {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public void close() throws IOException {
+            IOUtils.close(toClose);
+        }
+    }
+
+    private static class FieldsReader extends FieldsProducer {
+
+        private final Map<String, FieldsProducer> fields = new TreeMap<>();
+        private final Map<String, FieldsProducer> formats = new HashMap<>();
+        private final String segment;
+
+        // clone for merge
+        FieldsReader(FieldsReader other) {
+            Map<FieldsProducer, FieldsProducer> oldToNew = new IdentityHashMap<>();
+            // First clone all formats
+            for (Map.Entry<String, FieldsProducer> ent : other.formats.entrySet()) {
+                FieldsProducer values = ent.getValue().getMergeInstance();
+                formats.put(ent.getKey(), values);
+                oldToNew.put(ent.getValue(), values);
+            }
+
+            // Then rebuild fields:
+            for (Map.Entry<String, FieldsProducer> ent : other.fields.entrySet()) {
+                FieldsProducer producer = oldToNew.get(ent.getValue());
+                assert producer != null;
+                fields.put(ent.getKey(), producer);
+            }
+
+            segment = other.segment;
+        }
+
+        FieldsReader(final SegmentReadState readState, LegacyAdaptingPerFieldPostingsFormat legacyAdaptingPerFieldPostingsFormat)
+            throws IOException {
+
+            // Read _X.per and init each format:
+            boolean success = false;
+            try {
+                // Read field name -> format name
+                for (FieldInfo fi : readState.fieldInfos) {
+                    if (fi.getIndexOptions() != IndexOptions.NONE) {
+                        final String fieldName = fi.name;
+                        final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
+                        if (formatName != null) {
+                            // null formatName means the field is in fieldInfos, but has no postings!
+                            final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
+                            if (suffix == null) {
+                                throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
+                            }
+                            PostingsFormat format = legacyAdaptingPerFieldPostingsFormat.getPostingsFormat(formatName);
+                            String segmentSuffix = getSuffix(formatName, suffix);
+                            if (formats.containsKey(segmentSuffix) == false) {
+                                formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
+                            }
+                            fields.put(fieldName, formats.get(segmentSuffix));
+                        }
+                    }
+                }
+                success = true;
+            } finally {
+                if (success == false) {
+                    IOUtils.closeWhileHandlingException(formats.values());
+                }
+            }
+
+            this.segment = readState.segmentInfo.name;
+        }
+
+        @Override
+        public Iterator<String> iterator() {
+            return Collections.unmodifiableSet(fields.keySet()).iterator();
+        }
+
+        @Override
+        public Terms terms(String field) throws IOException {
+            FieldsProducer fieldsProducer = fields.get(field);
+            return fieldsProducer == null ? null : fieldsProducer.terms(field);
+        }
+
+        @Override
+        public int size() {
+            return fields.size();
+        }
+
+        @Override
+        public void close() throws IOException {
+            IOUtils.close(formats.values());
+        }
+
+        @Override
+        public void checkIntegrity() throws IOException {
+            for (FieldsProducer producer : formats.values()) {
+                producer.checkIntegrity();
+            }
+        }
+
+        @Override
+        public FieldsProducer getMergeInstance() {
+            return new FieldsReader(this);
+        }
+
+        @Override
+        public String toString() {
+            return "PerFieldPostings(segment=" + segment + " formats=" + formats.size() + ")";
+        }
+    }
+
+    @Override
+    public final FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+        return new FieldsWriter(state);
+    }
+
+    @Override
+    public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+        return new FieldsReader(state, this);
+    }
+
+    /**
+     * Returns the postings format that should be used for writing new segments of <code>field</code>.
+     *
+     * <p>The field to format mapping is written to the index, so this method is only invoked when
+     * writing, not when reading.
+     */
+    public abstract PostingsFormat getPostingsFormatForField(String field);
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/CompressionAlgorithm.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/CompressionAlgorithm.java
new file mode 100644
index 0000000000000..c353279451a3e
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/CompressionAlgorithm.java
@@ -0,0 +1,77 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.compress.LowercaseAsciiCompression;
+
+import java.io.IOException;
+
+/** Compression algorithm used for suffixes of a block of terms. */
+enum CompressionAlgorithm {
+    NO_COMPRESSION(0x00) {
+
+        @Override
+        void read(DataInput in, byte[] out, int len) throws IOException {
+            in.readBytes(out, 0, len);
+        }
+    },
+
+    LOWERCASE_ASCII(0x01) {
+
+        @Override
+        void read(DataInput in, byte[] out, int len) throws IOException {
+            LowercaseAsciiCompression.decompress(in, out, len);
+        }
+    },
+
+    LZ4(0x02) {
+
+        @Override
+        void read(DataInput in, byte[] out, int len) throws IOException {
+            org.apache.lucene.util.compress.LZ4.decompress(EndiannessReverserUtil.wrapDataInput(in), len, out, 0);
+        }
+    };
+
+    private static final CompressionAlgorithm[] BY_CODE = new CompressionAlgorithm[3];
+
+    static {
+        for (CompressionAlgorithm alg : CompressionAlgorithm.values()) {
+            BY_CODE[alg.code] = alg;
+        }
+    }
+
+    /** Look up a {@link CompressionAlgorithm} by its {@link CompressionAlgorithm#code}. */
+    static CompressionAlgorithm byCode(int code) {
+        if (code < 0 || code >= BY_CODE.length) {
+            throw new IllegalArgumentException("Illegal code for a compression algorithm: " + code);
+        }
+        return BY_CODE[code];
+    }
+
+    public final int code;
+
+    CompressionAlgorithm(int code) {
+        this.code = code;
+    }
+
+    abstract void read(DataInput in, byte[] out, int len) throws IOException;
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java
new file mode 100644
index 0000000000000..3d24e82edd18b
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java
@@ -0,0 +1,207 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.ByteSequenceOutputs;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.OffHeapFSTStore;
+
+import java.io.IOException;
+
+/**
+ * BlockTree's implementation of {@link Terms}.
+ *
+ * @lucene.internal
+ */
+public final class FieldReader extends Terms {
+
+    // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+    final long numTerms;
+    final FieldInfo fieldInfo;
+    final long sumTotalTermFreq;
+    final long sumDocFreq;
+    final int docCount;
+    final long rootBlockFP;
+    final BytesRef rootCode;
+    final BytesRef minTerm;
+    final BytesRef maxTerm;
+    final Lucene40BlockTreeTermsReader parent;
+
+    final FST<BytesRef> index;
+    // private boolean DEBUG;
+
+    FieldReader(
+        Lucene40BlockTreeTermsReader parent,
+        FieldInfo fieldInfo,
+        long numTerms,
+        BytesRef rootCode,
+        long sumTotalTermFreq,
+        long sumDocFreq,
+        int docCount,
+        long indexStartFP,
+        IndexInput metaIn,
+        IndexInput indexIn,
+        BytesRef minTerm,
+        BytesRef maxTerm
+    ) throws IOException {
+        assert numTerms > 0;
+        this.fieldInfo = fieldInfo;
+        // DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
+        this.parent = parent;
+        this.numTerms = numTerms;
+        this.sumTotalTermFreq = sumTotalTermFreq;
+        this.sumDocFreq = sumDocFreq;
+        this.docCount = docCount;
+        this.rootCode = rootCode;
+        this.minTerm = minTerm;
+        this.maxTerm = maxTerm;
+        // if (DEBUG) {
+        // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode="
+        // + rootCode + " divisor=" + indexDivisor);
+        // }
+        rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong()
+            >>> Lucene40BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
+        // Initialize FST always off-heap.
+        final IndexInput clone = indexIn.clone();
+        clone.seek(indexStartFP);
+        if (metaIn == indexIn) { // Only true before Lucene 8.6
+            index = new FST<>(clone, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
+        } else {
+            index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
+        }
+        /*
+         if (false) {
+         final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+         Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+         Util.toDot(index, w, false, false);
+         System.out.println("FST INDEX: SAVED to " + dotFileName);
+         w.close();
+         }
+        */
+    }
+
+    @Override
+    public BytesRef getMin() throws IOException {
+        if (minTerm == null) {
+            // Older index that didn't store min/maxTerm
+            return super.getMin();
+        } else {
+            return minTerm;
+        }
+    }
+
+    @Override
+    public BytesRef getMax() throws IOException {
+        if (maxTerm == null) {
+            // Older index that didn't store min/maxTerm
+            return super.getMax();
+        } else {
+            return maxTerm;
+        }
+    }
+
+    /** For debugging -- used by CheckIndex too */
+    @Override
+    public Stats getStats() throws IOException {
+        return new SegmentTermsEnum(this).computeBlockStats();
+    }
+
+    @Override
+    public boolean hasFreqs() {
+        return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    }
+
+    @Override
+    public boolean hasOffsets() {
+        return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+        return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+
+    @Override
+    public boolean hasPayloads() {
+        return fieldInfo.hasPayloads();
+    }
+
+    @Override
+    public TermsEnum iterator() throws IOException {
+        return new SegmentTermsEnum(this);
+    }
+
+    @Override
+    public long size() {
+        return numTerms;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+        return sumTotalTermFreq;
+    }
+
+    @Override
+    public long getSumDocFreq() {
+        return sumDocFreq;
+    }
+
+    @Override
+    public int getDocCount() {
+        return docCount;
+    }
+
+    @Override
+    public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+        // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
+        // BlockTreeTermsWriter.brToString(startTerm));
+        // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
+        // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
+        // can we optimize knowing that...?
+        if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+            throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+        }
+        return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
+    }
+
+    @Override
+    public String toString() {
+        return "BlockTreeTerms(seg="
+            + parent.segment
+            + " terms="
+            + numTerms
+            + ",postings="
+            + sumDocFreq
+            + ",positions="
+            + sumTotalTermFreq
+            + ",docs="
+            + docCount
+            + ")";
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnum.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnum.java
new file mode 100644
index 0000000000000..7bc765a78bd0e
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnum.java
@@ -0,0 +1,577 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.index.BaseTermsEnum;
+import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.ByteSequenceOutputs;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.Outputs;
+
+import java.io.IOException;
+
+/**
+ * This is used to implement efficient {@link Terms#intersect} for block-tree. Note that it cannot
+ * seek, except for the initial term on init. It just "nexts" through the intersection of the
+ * automaton and the terms. It does not use the terms index at all: on init, it loads the root
+ * block, and scans its way to the initial term. Likewise, in next it scans until it finds a term
+ * that matches the current automaton transition.
+ */
+final class IntersectTermsEnum extends BaseTermsEnum {
+
+    // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+    final IndexInput in;
+    static final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
+
+    IntersectTermsEnumFrame[] stack;
+
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
+
+    final RunAutomaton runAutomaton;
+    final Automaton automaton;
+    final BytesRef commonSuffix;
+
+    private IntersectTermsEnumFrame currentFrame;
+    private Transition currentTransition;
+
+    private final BytesRef term = new BytesRef();
+
+    private final FST.BytesReader fstReader;
+
+    final FieldReader fr;
+
+    private BytesRef savedStartTerm;
+
+    // TODO: in some cases we can filter by length? eg
+    // regexp foo*bar must be at least length 6 bytes
+    IntersectTermsEnum(FieldReader fr, Automaton automaton, RunAutomaton runAutomaton, BytesRef commonSuffix, BytesRef startTerm)
+        throws IOException {
+        this.fr = fr;
+
+        assert automaton != null;
+        assert runAutomaton != null;
+
+        this.runAutomaton = runAutomaton;
+        this.automaton = automaton;
+        this.commonSuffix = commonSuffix;
+
+        in = fr.parent.termsIn.clone();
+        stack = new IntersectTermsEnumFrame[5];
+        for (int idx = 0; idx < stack.length; idx++) {
+            stack[idx] = new IntersectTermsEnumFrame(this, idx);
+        }
+        for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) {
+            arcs[arcIdx] = new FST.Arc<>();
+        }
+
+        fstReader = fr.index.getBytesReader();
+
+        // TODO: if the automaton is "smallish" we really
+        // should use the terms index to seek at least to
+        // the initial term and likely to subsequent terms
+        // (or, maybe just fallback to ATE for such cases).
+        // Else the seek cost of loading the frames will be
+        // too costly.
+
+        final FST.Arc<BytesRef> arc = fr.index.getFirstArc(arcs[0]);
+        // Empty string prefix must have an output in the index!
+        assert arc.isFinal();
+
+        // Special pushFrame since it's the first one:
+        final IntersectTermsEnumFrame f = stack[0];
+        f.fp = f.fpOrig = fr.rootBlockFP;
+        f.prefix = 0;
+        f.setState(0);
+        f.arc = arc;
+        f.outputPrefix = arc.output();
+        f.load(fr.rootCode);
+
+        // for assert:
+        assert setSavedStartTerm(startTerm);
+
+        currentFrame = f;
+        if (startTerm != null) {
+            seekToStartTerm(startTerm);
+        }
+        currentTransition = currentFrame.transition;
+    }
+
+    // only for assert:
+    private boolean setSavedStartTerm(BytesRef startTerm) {
+        savedStartTerm = startTerm == null ? null : BytesRef.deepCopyOf(startTerm);
+        return true;
+    }
+
+    @Override
+    public TermState termState() throws IOException {
+        currentFrame.decodeMetaData();
+        return currentFrame.termState.clone();
+    }
+
+    private IntersectTermsEnumFrame getFrame(int ord) throws IOException {
+        if (ord >= stack.length) {
+            final IntersectTermsEnumFrame[] next = new IntersectTermsEnumFrame[ArrayUtil.oversize(
+                1 + ord,
+                RamUsageEstimator.NUM_BYTES_OBJECT_REF
+            )];
+            System.arraycopy(stack, 0, next, 0, stack.length);
+            for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
+                next[stackOrd] = new IntersectTermsEnumFrame(this, stackOrd);
+            }
+            stack = next;
+        }
+        assert stack[ord].ord == ord;
+        return stack[ord];
+    }
+
+    private FST.Arc<BytesRef> getArc(int ord) {
+        if (ord >= arcs.length) {
+            @SuppressWarnings({ "rawtypes", "unchecked" })
+            final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+            System.arraycopy(arcs, 0, next, 0, arcs.length);
+            for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
+                next[arcOrd] = new FST.Arc<>();
+            }
+            arcs = next;
+        }
+        return arcs[ord];
+    }
+
+    private IntersectTermsEnumFrame pushFrame(int state) throws IOException {
+        assert currentFrame != null;
+
+        final IntersectTermsEnumFrame f = getFrame(currentFrame == null ? 0 : 1 + currentFrame.ord);
+
+        f.fp = f.fpOrig = currentFrame.lastSubFP;
+        f.prefix = currentFrame.prefix + currentFrame.suffix;
+        f.setState(state);
+
+        // Walk the arc through the index -- we only
+        // "bother" with this so we can get the floor data
+        // from the index and skip floor blocks when
+        // possible:
+        FST.Arc<BytesRef> arc = currentFrame.arc;
+        int idx = currentFrame.prefix;
+        assert currentFrame.suffix > 0;
+        BytesRef output = currentFrame.outputPrefix;
+        while (idx < f.prefix) {
+            final int target = term.bytes[idx] & 0xff;
+            // TODO: we could be more efficient for the next()
+            // case by using current arc as starting point,
+            // passed to findTargetArc
+            arc = fr.index.findTargetArc(target, arc, getArc(1 + idx), fstReader);
+            assert arc != null;
+            output = fstOutputs.add(output, arc.output());
+            idx++;
+        }
+
+        f.arc = arc;
+        f.outputPrefix = output;
+        assert arc.isFinal();
+        f.load(fstOutputs.add(output, arc.nextFinalOutput()));
+        return f;
+    }
+
+    @Override
+    public BytesRef term() {
+        return term;
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+        currentFrame.decodeMetaData();
+        return currentFrame.termState.docFreq;
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+        currentFrame.decodeMetaData();
+        return currentFrame.termState.totalTermFreq;
+    }
+
+    @Override
+    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+        currentFrame.decodeMetaData();
+        return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, reuse, flags);
+    }
+
+    @Override
+    public ImpactsEnum impacts(int flags) throws IOException {
+        currentFrame.decodeMetaData();
+        return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
+    }
+
+    private int getState() {
+        int state = currentFrame.state;
+        for (int idx = 0; idx < currentFrame.suffix; idx++) {
+            state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos + idx] & 0xff);
+            assert state != -1;
+        }
+        return state;
+    }
+
+    // NOTE: specialized to only doing the first-time
+    // seek, but we could generalize it to allow
+    // arbitrary seekExact/Ceil. Note that this is a
+    // seekFloor!
+    private void seekToStartTerm(BytesRef target) throws IOException {
+        assert currentFrame.ord == 0;
+        if (term.length < target.length) {
+            term.bytes = ArrayUtil.grow(term.bytes, target.length);
+        }
+        FST.Arc<BytesRef> arc = arcs[0];
+        assert arc == currentFrame.arc;
+
+        for (int idx = 0; idx <= target.length; idx++) {
+
+            while (true) {
+                final int savNextEnt = currentFrame.nextEnt;
+                final int savePos = currentFrame.suffixesReader.getPosition();
+                final int saveLengthPos = currentFrame.suffixLengthsReader.getPosition();
+                final int saveStartBytePos = currentFrame.startBytePos;
+                final int saveSuffix = currentFrame.suffix;
+                final long saveLastSubFP = currentFrame.lastSubFP;
+                final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
+
+                final boolean isSubBlock = currentFrame.next();
+
+                term.length = currentFrame.prefix + currentFrame.suffix;
+                if (term.bytes.length < term.length) {
+                    term.bytes = ArrayUtil.grow(term.bytes, term.length);
+                }
+                System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+
+                if (isSubBlock && StringHelper.startsWith(target, term)) {
+                    // Recurse
+                    currentFrame = pushFrame(getState());
+                    break;
+                } else {
+                    final int cmp = term.compareTo(target);
+                    if (cmp < 0) {
+                        if (currentFrame.nextEnt == currentFrame.entCount) {
+                            if (currentFrame.isLastInFloor == false) {
+                                // Advance to next floor block
+                                currentFrame.loadNextFloorBlock();
+                                continue;
+                            } else {
+                                return;
+                            }
+                        }
+                        continue;
+                    } else if (cmp == 0) {
+                        return;
+                    } else {
+                        // Fallback to prior entry: the semantics of
+                        // this method is that the first call to
+                        // next() will return the term after the
+                        // requested term
+                        currentFrame.nextEnt = savNextEnt;
+                        currentFrame.lastSubFP = saveLastSubFP;
+                        currentFrame.startBytePos = saveStartBytePos;
+                        currentFrame.suffix = saveSuffix;
+                        currentFrame.suffixesReader.setPosition(savePos);
+                        currentFrame.suffixLengthsReader.setPosition(saveLengthPos);
+                        currentFrame.termState.termBlockOrd = saveTermBlockOrd;
+                        System.arraycopy(
+                            currentFrame.suffixBytes,
+                            currentFrame.startBytePos,
+                            term.bytes,
+                            currentFrame.prefix,
+                            currentFrame.suffix
+                        );
+                        term.length = currentFrame.prefix + currentFrame.suffix;
+                        // If the last entry was a block we don't
+                        // need to bother recursing and pushing to
+                        // the last term under it because the first
+                        // next() will simply skip the frame anyway
+                        return;
+                    }
+                }
+            }
+        }
+
+        assert false;
+    }
+
+    private boolean popPushNext() throws IOException {
+        // Pop finished frames
+        while (currentFrame.nextEnt == currentFrame.entCount) {
+            if (currentFrame.isLastInFloor == false) {
+                // Advance to next floor block
+                currentFrame.loadNextFloorBlock();
+                break;
+            } else {
+                if (currentFrame.ord == 0) {
+                    throw NoMoreTermsException.INSTANCE;
+                }
+                final long lastFP = currentFrame.fpOrig;
+                currentFrame = stack[currentFrame.ord - 1];
+                currentTransition = currentFrame.transition;
+                assert currentFrame.lastSubFP == lastFP;
+            }
+        }
+
+        return currentFrame.next();
+    }
+
+    // Only used internally when there are no more terms in next():
+    private static final class NoMoreTermsException extends RuntimeException {
+
+        // Only used internally when there are no more terms in next():
+        public static final NoMoreTermsException INSTANCE = new NoMoreTermsException();
+
+        private NoMoreTermsException() {}
+
+        @Override
+        public Throwable fillInStackTrace() {
+            // Do nothing:
+            return this;
+        }
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+        try {
+            return _next();
+        } catch (@SuppressWarnings("unused") NoMoreTermsException eoi) {
+            // Provoke NPE if we are (illegally!) called again:
+            currentFrame = null;
+            return null;
+        }
+    }
+
+    private BytesRef _next() throws IOException {
+
+        boolean isSubBlock = popPushNext();
+
+        nextTerm: while (true) {
+            assert currentFrame.transition == currentTransition;
+
+            int state;
+            int lastState;
+
+            // NOTE: suffix == 0 can only happen on the first term in a block, when
+            // there is a term exactly matching a prefix in the index. If we
+            // could somehow re-org the code so we only checked this case immediately
+            // after pushing a frame...
+            if (currentFrame.suffix != 0) {
+
+                final byte[] suffixBytes = currentFrame.suffixBytes;
+
+                // This is the first byte of the suffix of the term we are now on:
+                final int label = suffixBytes[currentFrame.startBytePos] & 0xff;
+
+                if (label < currentTransition.min) {
+                    // Common case: we are scanning terms in this block to "catch up" to
+                    // current transition in the automaton:
+                    int minTrans = currentTransition.min;
+                    while (currentFrame.nextEnt < currentFrame.entCount) {
+                        isSubBlock = currentFrame.next();
+                        if ((suffixBytes[currentFrame.startBytePos] & 0xff) >= minTrans) {
+                            continue nextTerm;
+                        }
+                    }
+
+                    // End of frame:
+                    isSubBlock = popPushNext();
+                    continue nextTerm;
+                }
+
+                // Advance where we are in the automaton to match this label:
+
+                while (label > currentTransition.max) {
+                    if (currentFrame.transitionIndex >= currentFrame.transitionCount - 1) {
+                        // Pop this frame: no further matches are possible because
+                        // we've moved beyond what the max transition will allow
+                        if (currentFrame.ord == 0) {
+                            // Provoke NPE if we are (illegally!) called again:
+                            currentFrame = null;
+                            return null;
+                        }
+                        currentFrame = stack[currentFrame.ord - 1];
+                        currentTransition = currentFrame.transition;
+                        isSubBlock = popPushNext();
+                        continue nextTerm;
+                    }
+                    currentFrame.transitionIndex++;
+                    automaton.getNextTransition(currentTransition);
+
+                    if (label < currentTransition.min) {
+                        int minTrans = currentTransition.min;
+                        while (currentFrame.nextEnt < currentFrame.entCount) {
+                            isSubBlock = currentFrame.next();
+                            if ((suffixBytes[currentFrame.startBytePos] & 0xff) >= minTrans) {
+                                continue nextTerm;
+                            }
+                        }
+
+                        // End of frame:
+                        isSubBlock = popPushNext();
+                        continue nextTerm;
+                    }
+                }
+
+                if (commonSuffix != null && isSubBlock == false) {
+                    final int termLen = currentFrame.prefix + currentFrame.suffix;
+                    if (termLen < commonSuffix.length) {
+                        // No match
+                        isSubBlock = popPushNext();
+                        continue nextTerm;
+                    }
+
+                    final byte[] commonSuffixBytes = commonSuffix.bytes;
+
+                    final int lenInPrefix = commonSuffix.length - currentFrame.suffix;
+                    assert commonSuffix.offset == 0;
+                    int suffixBytesPos;
+                    int commonSuffixBytesPos = 0;
+
+                    if (lenInPrefix > 0) {
+                        // A prefix of the common suffix overlaps with
+                        // the suffix of the block prefix so we first
+                        // test whether the prefix part matches:
+                        final byte[] termBytes = term.bytes;
+                        int termBytesPos = currentFrame.prefix - lenInPrefix;
+                        assert termBytesPos >= 0;
+                        final int termBytesPosEnd = currentFrame.prefix;
+                        while (termBytesPos < termBytesPosEnd) {
+                            if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+                                isSubBlock = popPushNext();
+                                continue nextTerm;
+                            }
+                        }
+                        suffixBytesPos = currentFrame.startBytePos;
+                    } else {
+                        suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - commonSuffix.length;
+                    }
+
+                    // Test overlapping suffix part:
+                    final int commonSuffixBytesPosEnd = commonSuffix.length;
+                    while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
+                        if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+                            isSubBlock = popPushNext();
+                            continue nextTerm;
+                        }
+                    }
+                }
+
+                // TODO: maybe we should do the same linear test
+                // that AutomatonTermsEnum does, so that if we
+                // reach a part of the automaton where .* is
+                // "temporarily" accepted, we just blindly .next()
+                // until the limit
+
+                // See if the term suffix matches the automaton:
+
+                // We know from above that the first byte in our suffix (label) matches
+                // the current transition, so we step from the 2nd byte
+                // in the suffix:
+                lastState = currentFrame.state;
+                state = currentTransition.dest;
+
+                int end = currentFrame.startBytePos + currentFrame.suffix;
+                for (int idx = currentFrame.startBytePos + 1; idx < end; idx++) {
+                    lastState = state;
+                    state = runAutomaton.step(state, suffixBytes[idx] & 0xff);
+                    if (state == -1) {
+                        // No match
+                        isSubBlock = popPushNext();
+                        continue nextTerm;
+                    }
+                }
+            } else {
+                state = currentFrame.state;
+                lastState = currentFrame.lastState;
+            }
+
+            if (isSubBlock) {
+                // Match! Recurse:
+                copyTerm();
+                currentFrame = pushFrame(state);
+                currentTransition = currentFrame.transition;
+                currentFrame.lastState = lastState;
+            } else if (runAutomaton.isAccept(state)) {
+                copyTerm();
+                assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
+                    : "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
+                return term;
+            } else {
+                // This term is a prefix of a term accepted by the automaton, but is not itself accepted
+            }
+
+            isSubBlock = popPushNext();
+        }
+    }
+
+    // for debugging
+    @SuppressWarnings("unused")
+    static String brToString(BytesRef b) {
+        try {
+            return b.utf8ToString() + " " + b;
+        } catch (Throwable t) {
+            // If BytesRef isn't actually UTF8, or it's eg a
+            // prefix of UTF8 that ends mid-unicode-char, we
+            // fallback to hex:
+            return b.toString();
+        }
+    }
+
+    private void copyTerm() {
+        final int len = currentFrame.prefix + currentFrame.suffix;
+        if (term.bytes.length < len) {
+            term.bytes = ArrayUtil.grow(term.bytes, len);
+        }
+        System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+        term.length = len;
+    }
+
+    @Override
+    public boolean seekExact(BytesRef text) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void seekExact(long ord) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long ord() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text) {
+        throw new UnsupportedOperationException();
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnumFrame.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnumFrame.java
new file mode 100644
index 0000000000000..ab515b958b689
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnumFrame.java
@@ -0,0 +1,358 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Transition;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+// TODO: can we share this with the frame in STE?
+final class IntersectTermsEnumFrame {
+    final int ord;
+    long fp;
+    long fpOrig;
+    long fpEnd;
+    long lastSubFP;
+
+    // private static boolean DEBUG = IntersectTermsEnum.DEBUG;
+
+    // State in automaton
+    int state;
+
+    // State just before the last label
+    int lastState;
+
+    int metaDataUpto;
+
+    byte[] suffixBytes = new byte[128];
+    final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();
+
+    byte[] suffixLengthBytes;
+    final ByteArrayDataInput suffixLengthsReader;
+
+    byte[] statBytes = new byte[64];
+    int statsSingletonRunLength = 0;
+    final ByteArrayDataInput statsReader = new ByteArrayDataInput();
+
+    byte[] floorData = new byte[32];
+    final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
+
+    // Length of prefix shared by all terms in this block
+    int prefix;
+
+    // Number of entries (term or sub-block) in this block
+    int entCount;
+
+    // Which term we will next read
+    int nextEnt;
+
+    // True if this block is either not a floor block,
+    // or, it's the last sub-block of a floor block
+    boolean isLastInFloor;
+
+    // True if all entries are terms
+    boolean isLeafBlock;
+
+    int numFollowFloorBlocks;
+    int nextFloorLabel;
+
+    final Transition transition = new Transition();
+    int transitionIndex;
+    int transitionCount;
+
+    FST.Arc<BytesRef> arc;
+
+    final BlockTermState termState;
+
+    // metadata buffer
+    byte[] bytes = new byte[32];
+
+    final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
+
+    // Cumulative output so far
+    BytesRef outputPrefix;
+
+    int startBytePos;
+    int suffix;
+
+    private final IntersectTermsEnum ite;
+    private final int version;
+
+    IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
+        this.ite = ite;
+        this.ord = ord;
+        this.termState = ite.fr.parent.postingsReader.newTermState();
+        this.termState.totalTermFreq = -1;
+        this.version = ite.fr.parent.version;
+        if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
+            suffixLengthBytes = new byte[32];
+            suffixLengthsReader = new ByteArrayDataInput();
+        } else {
+            suffixLengthBytes = null;
+            suffixLengthsReader = suffixesReader;
+        }
+    }
+
+    void loadNextFloorBlock() throws IOException {
+        assert numFollowFloorBlocks > 0 : "nextFloorLabel=" + nextFloorLabel;
+
+        do {
+            fp = fpOrig + (floorDataReader.readVLong() >>> 1);
+            numFollowFloorBlocks--;
+            if (numFollowFloorBlocks != 0) {
+                nextFloorLabel = floorDataReader.readByte() & 0xff;
+            } else {
+                nextFloorLabel = 256;
+            }
+        } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);
+
+        load(null);
+    }
+
+    public void setState(int state) {
+        this.state = state;
+        transitionIndex = 0;
+        transitionCount = ite.automaton.getNumTransitions(state);
+        if (transitionCount != 0) {
+            ite.automaton.initTransition(state, transition);
+            ite.automaton.getNextTransition(transition);
+        } else {
+
+            // Must set min to -1 so the "label < min" check never falsely triggers:
+            transition.min = -1;
+
+            // Must set max to -1 so we immediately realize we need to step to the next transition and
+            // then pop this frame:
+            transition.max = -1;
+        }
+    }
+
+    void load(BytesRef frameIndexData) throws IOException {
+        if (frameIndexData != null) {
+            floorDataReader.reset(frameIndexData.bytes, frameIndexData.offset, frameIndexData.length);
+            // Skip first long -- has redundant fp, hasTerms
+            // flag, isFloor flag
+            final long code = floorDataReader.readVLong();
+            if ((code & Lucene40BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) {
+                // Floor frame
+                numFollowFloorBlocks = floorDataReader.readVInt();
+                nextFloorLabel = floorDataReader.readByte() & 0xff;
+
+                // If current state is not accept, and has transitions, we must process
+                // first block in case it has empty suffix:
+                if (ite.runAutomaton.isAccept(state) == false && transitionCount != 0) {
+                    // Maybe skip floor blocks:
+                    assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
+                    while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
+                        fp = fpOrig + (floorDataReader.readVLong() >>> 1);
+                        numFollowFloorBlocks--;
+                        if (numFollowFloorBlocks != 0) {
+                            nextFloorLabel = floorDataReader.readByte() & 0xff;
+                        } else {
+                            nextFloorLabel = 256;
+                        }
+                    }
+                }
+            }
+        }
+
+        ite.in.seek(fp);
+        int code = ite.in.readVInt();
+        entCount = code >>> 1;
+        assert entCount > 0;
+        isLastInFloor = (code & 1) != 0;
+
+        // term suffixes:
+        if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
+            final long codeL = ite.in.readVLong();
+            isLeafBlock = (codeL & 0x04) != 0;
+            final int numSuffixBytes = (int) (codeL >>> 3);
+            if (suffixBytes.length < numSuffixBytes) {
+                suffixBytes = new byte[ArrayUtil.oversize(numSuffixBytes, 1)];
+            }
+            final CompressionAlgorithm compressionAlg;
+            try {
+                compressionAlg = CompressionAlgorithm.byCode((int) codeL & 0x03);
+            } catch (IllegalArgumentException e) {
+                throw new CorruptIndexException(e.getMessage(), ite.in, e);
+            }
+            compressionAlg.read(ite.in, suffixBytes, numSuffixBytes);
+            suffixesReader.reset(suffixBytes, 0, numSuffixBytes);
+
+            int numSuffixLengthBytes = ite.in.readVInt();
+            final boolean allEqual = (numSuffixLengthBytes & 0x01) != 0;
+            numSuffixLengthBytes >>>= 1;
+            if (suffixLengthBytes.length < numSuffixLengthBytes) {
+                suffixLengthBytes = new byte[ArrayUtil.oversize(numSuffixLengthBytes, 1)];
+            }
+            if (allEqual) {
+                Arrays.fill(suffixLengthBytes, 0, numSuffixLengthBytes, ite.in.readByte());
+            } else {
+                ite.in.readBytes(suffixLengthBytes, 0, numSuffixLengthBytes);
+            }
+            suffixLengthsReader.reset(suffixLengthBytes, 0, numSuffixLengthBytes);
+        } else {
+            code = ite.in.readVInt();
+            isLeafBlock = (code & 1) != 0;
+            int numBytes = code >>> 1;
+            if (suffixBytes.length < numBytes) {
+                suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+            }
+            ite.in.readBytes(suffixBytes, 0, numBytes);
+            suffixesReader.reset(suffixBytes, 0, numBytes);
+        }
+
+        // stats
+        int numBytes = ite.in.readVInt();
+        if (statBytes.length < numBytes) {
+            statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+        }
+        ite.in.readBytes(statBytes, 0, numBytes);
+        statsReader.reset(statBytes, 0, numBytes);
+        statsSingletonRunLength = 0;
+        metaDataUpto = 0;
+
+        termState.termBlockOrd = 0;
+        nextEnt = 0;
+
+        // metadata
+        numBytes = ite.in.readVInt();
+        if (bytes.length < numBytes) {
+            bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+        }
+        ite.in.readBytes(bytes, 0, numBytes);
+        bytesReader.reset(bytes, 0, numBytes);
+
+        if (isLastInFloor == false) {
+            // Sub-blocks of a single floor block are always
+            // written one after another -- tail recurse:
+            fpEnd = ite.in.getFilePointer();
+        }
+    }
+
+    // TODO: maybe add scanToLabel; should give perf boost
+
+    // Decodes next entry; returns true if it's a sub-block
+    public boolean next() {
+        if (isLeafBlock) {
+            nextLeaf();
+            return false;
+        } else {
+            return nextNonLeaf();
+        }
+    }
+
+    public void nextLeaf() {
+        assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+        nextEnt++;
+        suffix = suffixLengthsReader.readVInt();
+        startBytePos = suffixesReader.getPosition();
+        suffixesReader.skipBytes(suffix);
+    }
+
+    public boolean nextNonLeaf() {
+        assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+        nextEnt++;
+        final int code = suffixLengthsReader.readVInt();
+        suffix = code >>> 1;
+        startBytePos = suffixesReader.getPosition();
+        suffixesReader.skipBytes(suffix);
+        if ((code & 1) == 0) {
+            // A normal term
+            termState.termBlockOrd++;
+            return false;
+        } else {
+            // A sub-block; make sub-FP absolute:
+            lastSubFP = fp - suffixLengthsReader.readVLong();
+            return true;
+        }
+    }
+
+    public int getTermBlockOrd() {
+        return isLeafBlock ? nextEnt : termState.termBlockOrd;
+    }
+
+    public void decodeMetaData() throws IOException {
+
+        // lazily catch up on metadata decode:
+        final int limit = getTermBlockOrd();
+        boolean absolute = metaDataUpto == 0;
+        assert limit > 0;
+
+        // TODO: better API would be "jump straight to term=N"???
+        while (metaDataUpto < limit) {
+
+            // TODO: we could make "tiers" of metadata, ie,
+            // decode docFreq/totalTF but don't decode postings
+            // metadata; this way caller could get
+            // docFreq/totalTF w/o paying decode cost for
+            // postings
+
+            // TODO: if docFreq were bulk decoded we could
+            // just skipN here:
+
+            // stats
+            if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
+                if (statsSingletonRunLength > 0) {
+                    termState.docFreq = 1;
+                    termState.totalTermFreq = 1;
+                    statsSingletonRunLength--;
+                } else {
+                    int token = statsReader.readVInt();
+                    if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES && (token & 1) == 1) {
+                        termState.docFreq = 1;
+                        termState.totalTermFreq = 1;
+                        statsSingletonRunLength = token >>> 1;
+                    } else {
+                        termState.docFreq = token >>> 1;
+                        if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
+                            termState.totalTermFreq = termState.docFreq;
+                        } else {
+                            termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
+                        }
+                    }
+                }
+            } else {
+                termState.docFreq = statsReader.readVInt();
+                // if (DEBUG) System.out.println(" dF=" + state.docFreq);
+                if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
+                    termState.totalTermFreq = termState.docFreq; // all postings have freq=1
+                } else {
+                    termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
+                    // if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
+                }
+            }
+            // metadata
+            ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);
+
+            metaDataUpto++;
+            absolute = false;
+        }
+        termState.termBlockOrd = metaDataUpto;
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
new file mode 100644
index 0000000000000..807b821d8d145
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
@@ -0,0 +1,395 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.ByteSequenceOutputs;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.Outputs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A block-based terms index and dictionary that assigns terms to variable length blocks according
+ * to how they share prefixes. The terms index is a prefix trie whose leaves are term blocks. The
+ * advantage of this approach is that seekExact is often able to determine a term cannot exist
+ * without doing any IO, and intersection with Automata is very fast. Note that this terms
+ * dictionary has its own fixed terms index (ie, it does not support a pluggable terms index
+ * implementation).
+ *
+ * <p><b>NOTE</b>: this terms dictionary supports min/maxItemsPerBlock during indexing to control
+ * how much memory the terms index uses.
+ *
+ * <p>The data structure used by this implementation is very similar to a burst trie
+ * (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499), but with added logic to break
+ * up too-large blocks of all terms sharing a given prefix into smaller ones.
+ *
+ * <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code> option to see
+ * summary statistics on the blocks in the dictionary.
+ *
+ * <p>See {@code BlockTreeTermsWriter}.
+ *
+ * @lucene.experimental
+ */
+public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
+
+    static final Outputs<BytesRef> FST_OUTPUTS = ByteSequenceOutputs.getSingleton();
+
+    static final BytesRef NO_OUTPUT = FST_OUTPUTS.getNoOutput();
+
+    static final int OUTPUT_FLAGS_NUM_BITS = 2;
+    static final int OUTPUT_FLAGS_MASK = 0x3;
+    static final int OUTPUT_FLAG_IS_FLOOR = 0x1;
+    static final int OUTPUT_FLAG_HAS_TERMS = 0x2;
+
+    /** Extension of terms file */
+    static final String TERMS_EXTENSION = "tim";
+
+    static final String TERMS_CODEC_NAME = "BlockTreeTermsDict";
+
+    /** Initial terms format. */
+    public static final int VERSION_START = 2;
+
+    /** Auto-prefix terms have been superseded by points. */
+    public static final int VERSION_AUTO_PREFIX_TERMS_REMOVED = 3;
+
+    /** The long[] + byte[] metadata has been replaced with a single byte[]. */
+    public static final int VERSION_META_LONGS_REMOVED = 4;
+
+    /** Suffixes are compressed to save space. */
+    public static final int VERSION_COMPRESSED_SUFFIXES = 5;
+
+    /** Metadata is written to its own file. */
+    public static final int VERSION_META_FILE = 6;
+
+    /** Current terms format. */
+    public static final int VERSION_CURRENT = VERSION_META_FILE;
+
+    /** Extension of terms index file */
+    static final String TERMS_INDEX_EXTENSION = "tip";
+
+    static final String TERMS_INDEX_CODEC_NAME = "BlockTreeTermsIndex";
+
+    /** Extension of terms meta file */
+    static final String TERMS_META_EXTENSION = "tmd";
+
+    static final String TERMS_META_CODEC_NAME = "BlockTreeTermsMeta";
+
+    // Open input to the main terms dict file (_X.tib)
+    final IndexInput termsIn;
+    // Open input to the terms index file (_X.tip)
+    final IndexInput indexIn;
+
+    // private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+    // Reads the terms dict entries, to gather state to
+    // produce DocsEnum on demand
+    final PostingsReaderBase postingsReader;
+
+    private final Map<String, FieldReader> fieldMap;
+    private final List<String> fieldList;
+
+    final String segment;
+
+    final int version;
+
+    /** Sole constructor. */
+    public Lucene40BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {
+        boolean success = false;
+
+        this.postingsReader = postingsReader;
+        this.segment = state.segmentInfo.name;
+
+        try {
+            String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
+            termsIn = EndiannessReverserUtil.openInput(state.directory, termsName, state.context);
+            version = CodecUtil.checkIndexHeader(
+                termsIn,
+                TERMS_CODEC_NAME,
+                VERSION_START,
+                VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            if (version < VERSION_AUTO_PREFIX_TERMS_REMOVED) {
+                // pre-6.2 index, records whether auto-prefix terms are enabled in the header
+                byte b = termsIn.readByte();
+                if (b != 0) {
+                    throw new CorruptIndexException("Index header pretends the index has auto-prefix terms: " + b, termsIn);
+                }
+            }
+
+            String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
+            indexIn = EndiannessReverserUtil.openInput(state.directory, indexName, state.context);
+            CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+
+            if (version < VERSION_META_FILE) {
+                // Have PostingsReader init itself
+                postingsReader.init(termsIn, state);
+
+                // Verifying the checksum against all bytes would be too costly, but for now we at least
+                // verify proper structure of the checksum footer. This is cheap and can detect some forms
+                // of corruption such as file truncation.
+                CodecUtil.retrieveChecksum(indexIn);
+                CodecUtil.retrieveChecksum(termsIn);
+            }
+
+            // Read per-field details
+            String metaName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_META_EXTENSION);
+            Map<String, FieldReader> fieldMap = null;
+            Throwable priorE = null;
+            long indexLength = -1, termsLength = -1;
+            try (
+                ChecksumIndexInput metaIn = version >= VERSION_META_FILE
+                    ? EndiannessReverserUtil.openChecksumInput(state.directory, metaName, state.context)
+                    : null
+            ) {
+                try {
+                    final IndexInput indexMetaIn, termsMetaIn;
+                    if (version >= VERSION_META_FILE) {
+                        CodecUtil.checkIndexHeader(
+                            metaIn,
+                            TERMS_META_CODEC_NAME,
+                            version,
+                            version,
+                            state.segmentInfo.getId(),
+                            state.segmentSuffix
+                        );
+                        indexMetaIn = termsMetaIn = metaIn;
+                        postingsReader.init(metaIn, state);
+                    } else {
+                        seekDir(termsIn);
+                        seekDir(termsIn);
+                        seekDir(indexIn);
+                        indexMetaIn = indexIn;
+                        termsMetaIn = termsIn;
+                    }
+
+                    final int numFields = termsMetaIn.readVInt();
+                    if (numFields < 0) {
+                        throw new CorruptIndexException("invalid numFields: " + numFields, termsMetaIn);
+                    }
+                    fieldMap = new HashMap<>((int) (numFields / 0.75f) + 1);
+                    for (int i = 0; i < numFields; ++i) {
+                        final int field = termsMetaIn.readVInt();
+                        final long numTerms = termsMetaIn.readVLong();
+                        if (numTerms <= 0) {
+                            throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsMetaIn);
+                        }
+                        final BytesRef rootCode = readBytesRef(termsMetaIn);
+                        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
+                        if (fieldInfo == null) {
+                            throw new CorruptIndexException("invalid field number: " + field, termsMetaIn);
+                        }
+                        final long sumTotalTermFreq = termsMetaIn.readVLong();
+                        // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is
+                        // written.
+                        final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS
+                            ? sumTotalTermFreq
+                            : termsMetaIn.readVLong();
+                        final int docCount = termsMetaIn.readVInt();
+                        if (version < VERSION_META_LONGS_REMOVED) {
+                            final int longsSize = termsMetaIn.readVInt();
+                            if (longsSize < 0) {
+                                throw new CorruptIndexException(
+                                    "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize,
+                                    termsMetaIn
+                                );
+                            }
+                        }
+                        BytesRef minTerm = readBytesRef(termsMetaIn);
+                        BytesRef maxTerm = readBytesRef(termsMetaIn);
+                        if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
+                            throw new CorruptIndexException(
+                                "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(),
+                                termsMetaIn
+                            );
+                        }
+                        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
+                            throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsMetaIn);
+                        }
+                        if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
+                            throw new CorruptIndexException(
+                                "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
+                                termsMetaIn
+                            );
+                        }
+                        final long indexStartFP = indexMetaIn.readVLong();
+                        FieldReader previous = fieldMap.put(
+                            fieldInfo.name,
+                            new FieldReader(
+                                this,
+                                fieldInfo,
+                                numTerms,
+                                rootCode,
+                                sumTotalTermFreq,
+                                sumDocFreq,
+                                docCount,
+                                indexStartFP,
+                                indexMetaIn,
+                                indexIn,
+                                minTerm,
+                                maxTerm
+                            )
+                        );
+                        if (previous != null) {
+                            throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsMetaIn);
+                        }
+                    }
+                    if (version >= VERSION_META_FILE) {
+                        indexLength = metaIn.readLong();
+                        termsLength = metaIn.readLong();
+                    }
+                } catch (Throwable exception) {
+                    priorE = exception;
+                } finally {
+                    if (metaIn != null) {
+                        CodecUtil.checkFooter(metaIn, priorE);
+                    } else if (priorE != null) {
+                        IOUtils.rethrowAlways(priorE);
+                    }
+                }
+            }
+            if (version >= VERSION_META_FILE) {
+                // At this point the checksum of the meta file has been verified so the lengths are likely
+                // correct
+                CodecUtil.retrieveChecksum(indexIn, indexLength);
+                CodecUtil.retrieveChecksum(termsIn, termsLength);
+            } else {
+                assert indexLength == -1 : indexLength;
+                assert termsLength == -1 : termsLength;
+            }
+            List<String> fieldList = new ArrayList<>(fieldMap.keySet());
+            fieldList.sort(null);
+            this.fieldMap = fieldMap;
+            this.fieldList = Collections.unmodifiableList(fieldList);
+            success = true;
+        } finally {
+            if (success == false) {
+                // this.close() will close in:
+                IOUtils.closeWhileHandlingException(this);
+            }
+        }
+    }
+
+    private static BytesRef readBytesRef(IndexInput in) throws IOException {
+        int numBytes = in.readVInt();
+        if (numBytes < 0) {
+            throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
+        }
+
+        BytesRef bytes = new BytesRef();
+        bytes.length = numBytes;
+        bytes.bytes = new byte[numBytes];
+        in.readBytes(bytes.bytes, 0, numBytes);
+
+        return bytes;
+    }
+
+    /** Seek {@code input} to the directory offset. */
+    private static void seekDir(IndexInput input) throws IOException {
+        input.seek(input.length() - CodecUtil.footerLength() - 8);
+        long offset = input.readLong();
+        input.seek(offset);
+    }
+
+    // for debugging
+    // private static String toHex(int v) {
+    // return "0x" + Integer.toHexString(v);
+    // }
+
+    @Override
+    public void close() throws IOException {
+        try {
+            IOUtils.close(indexIn, termsIn, postingsReader);
+        } finally {
+            // Clear so refs to terms index is GCable even if
+            // app hangs onto us:
+            fieldMap.clear();
+        }
+    }
+
+    @Override
+    public Iterator<String> iterator() {
+        return fieldList.iterator();
+    }
+
+    @Override
+    public Terms terms(String field) throws IOException {
+        assert field != null;
+        return fieldMap.get(field);
+    }
+
+    @Override
+    public int size() {
+        return fieldMap.size();
+    }
+
+    // for debugging
+    String brToString(BytesRef b) {
+        if (b == null) {
+            return "null";
+        } else {
+            try {
+                return b.utf8ToString() + " " + b;
+            } catch (@SuppressWarnings("unused") Throwable t) {
+                // If BytesRef isn't actually UTF8, or it's eg a
+                // prefix of UTF8 that ends mid-unicode-char, we
+                // fallback to hex:
+                return b.toString();
+            }
+        }
+    }
+
+    @Override
+    public void checkIntegrity() throws IOException {
+        // terms index
+        CodecUtil.checksumEntireFile(indexIn);
+
+        // term dictionary
+        CodecUtil.checksumEntireFile(termsIn);
+
+        // postings
+        postingsReader.checkIntegrity();
+    }
+
+    @Override
+    public String toString() {
+        return getClass().getSimpleName() + "(fields=" + fieldMap.size() + ",delegate=" + postingsReader + ")";
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnum.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnum.java
new file mode 100644
index 0000000000000..60aa82a8255bd
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnum.java
@@ -0,0 +1,1170 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.BaseTermsEnum;
+import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.Util;
+
+import java.io.IOException;
+import java.io.PrintStream;
+
+/** Iterates through terms in this field. */
+final class SegmentTermsEnum extends BaseTermsEnum {
+
+    // Lazy init:
+    IndexInput in;
+
+    private SegmentTermsEnumFrame[] stack;
+    private final SegmentTermsEnumFrame staticFrame;
+    SegmentTermsEnumFrame currentFrame;
+    boolean termExists;
+    final FieldReader fr;
+
+    private int targetBeforeCurrentLength;
+
+    // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+    private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
+
+    // What prefix of the current term was present in the index; when we only next() through the
+    // index, this stays at 0. It's only set when
+    // we seekCeil/Exact:
+    private int validIndexPrefix;
+
+    // assert only:
+    private boolean eof;
+
+    final BytesRefBuilder term = new BytesRefBuilder();
+    private final FST.BytesReader fstReader;
+
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
+
+    SegmentTermsEnum(FieldReader fr) throws IOException {
+        this.fr = fr;
+
+        // if (DEBUG) {
+        // System.out.println("BTTR.init seg=" + fr.parent.segment);
+        // }
+        stack = new SegmentTermsEnumFrame[0];
+
+        // Used to hold seek by TermState, or cached seek
+        staticFrame = new SegmentTermsEnumFrame(this, -1);
+
+        if (fr.index == null) {
+            fstReader = null;
+        } else {
+            fstReader = fr.index.getBytesReader();
+        }
+
+        // Init w/ root block; don't use index since it may
+        // not (and need not) have been loaded
+        for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) {
+            arcs[arcIdx] = new FST.Arc<>();
+        }
+
+        currentFrame = staticFrame;
+        final FST.Arc<BytesRef> arc;
+        if (fr.index != null) {
+            arc = fr.index.getFirstArc(arcs[0]);
+            // Empty string prefix must have an output in the index!
+            assert arc.isFinal();
+        } else {
+            arc = null;
+        }
+        // currentFrame = pushFrame(arc, rootCode, 0);
+        // currentFrame.loadBlock();
+        validIndexPrefix = 0;
+        // if (DEBUG) {
+        // System.out.println("init frame state " + currentFrame.ord);
+        // printSeekState();
+        // }
+
+        // System.out.println();
+        // computeBlockStats().print(System.out);
+    }
+
+    // Not private to avoid synthetic access$NNN methods
+    void initIndexInput() {
+        if (this.in == null) {
+            this.in = fr.parent.termsIn.clone();
+        }
+    }
+
+    /** Runs next() through the entire terms dict, computing aggregate statistics. */
+    public Stats computeBlockStats() throws IOException {
+
+        Stats stats = new Stats(fr.parent.segment, fr.fieldInfo.name);
+        if (fr.index != null) {
+            stats.indexNumBytes = fr.index.ramBytesUsed();
+        }
+
+        currentFrame = staticFrame;
+        FST.Arc<BytesRef> arc;
+        if (fr.index != null) {
+            arc = fr.index.getFirstArc(arcs[0]);
+            // Empty string prefix must have an output in the index!
+            assert arc.isFinal();
+        } else {
+            arc = null;
+        }
+
+        // Empty string prefix must have an output in the
+        // index!
+        currentFrame = pushFrame(arc, fr.rootCode, 0);
+        currentFrame.fpOrig = currentFrame.fp;
+        currentFrame.loadBlock();
+        validIndexPrefix = 0;
+
+        stats.startBlock(currentFrame, currentFrame.isLastInFloor == false);
+
+        allTerms: while (true) {
+
+            // Pop finished blocks
+            while (currentFrame.nextEnt == currentFrame.entCount) {
+                stats.endBlock(currentFrame);
+                if (currentFrame.isLastInFloor == false) {
+                    // Advance to next floor block
+                    currentFrame.loadNextFloorBlock();
+                    stats.startBlock(currentFrame, true);
+                    break;
+                } else {
+                    if (currentFrame.ord == 0) {
+                        break allTerms;
+                    }
+                    final long lastFP = currentFrame.fpOrig;
+                    currentFrame = stack[currentFrame.ord - 1];
+                    assert lastFP == currentFrame.lastSubFP;
+                    // if (DEBUG) {
+                    // System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
+                    // }
+                }
+            }
+
+            while (true) {
+                if (currentFrame.next()) {
+                    // Push to new block:
+                    currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length());
+                    currentFrame.fpOrig = currentFrame.fp;
+                    // This is a "next" frame -- even if it's
+                    // floor'd we must pretend it isn't so we don't
+                    // try to scan to the right floor frame:
+                    currentFrame.loadBlock();
+                    stats.startBlock(currentFrame, currentFrame.isLastInFloor == false);
+                } else {
+                    stats.term(term.get());
+                    break;
+                }
+            }
+        }
+
+        stats.finish();
+
+        // Put root frame back:
+        currentFrame = staticFrame;
+        if (fr.index != null) {
+            arc = fr.index.getFirstArc(arcs[0]);
+            // Empty string prefix must have an output in the index!
+            assert arc.isFinal();
+        } else {
+            arc = null;
+        }
+        currentFrame = pushFrame(arc, fr.rootCode, 0);
+        currentFrame.rewind();
+        currentFrame.loadBlock();
+        validIndexPrefix = 0;
+        term.clear();
+
+        return stats;
+    }
+
+    private SegmentTermsEnumFrame getFrame(int ord) throws IOException {
+        if (ord >= stack.length) {
+            final SegmentTermsEnumFrame[] next = new SegmentTermsEnumFrame[ArrayUtil.oversize(
+                1 + ord,
+                RamUsageEstimator.NUM_BYTES_OBJECT_REF
+            )];
+            System.arraycopy(stack, 0, next, 0, stack.length);
+            for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
+                next[stackOrd] = new SegmentTermsEnumFrame(this, stackOrd);
+            }
+            stack = next;
+        }
+        assert stack[ord].ord == ord;
+        return stack[ord];
+    }
+
+    private FST.Arc<BytesRef> getArc(int ord) {
+        if (ord >= arcs.length) {
+            @SuppressWarnings({ "rawtypes", "unchecked" })
+            final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+            System.arraycopy(arcs, 0, next, 0, arcs.length);
+            for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
+                next[arcOrd] = new FST.Arc<>();
+            }
+            arcs = next;
+        }
+        return arcs[ord];
+    }
+
+    // Pushes a frame we seek'd to
+    SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException {
+        scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
+        final long code = scratchReader.readVLong();
+        final long fpSeek = code >>> Lucene40BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
+        final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
+        f.hasTerms = (code & Lucene40BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS) != 0;
+        f.hasTermsOrig = f.hasTerms;
+        f.isFloor = (code & Lucene40BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0;
+        if (f.isFloor) {
+            f.setFloorData(scratchReader, frameData);
+        }
+        pushFrame(arc, fpSeek, length);
+
+        return f;
+    }
+
+    // Pushes next'd frame or seek'd frame; we later
+    // lazy-load the frame only when needed
+    SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, long fp, int length) throws IOException {
+        final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
+        f.arc = arc;
+        if (f.fpOrig == fp && f.nextEnt != -1) {
+            // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
+            // isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
+            // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
+            // term.length + " vs prefix=" + f.prefix);
+            // if (f.prefix > targetBeforeCurrentLength) {
+            if (f.ord > targetBeforeCurrentLength) {
+                f.rewind();
+            } else {
+                // if (DEBUG) {
+                // System.out.println(" skip rewind!");
+                // }
+            }
+            assert length == f.prefix;
+        } else {
+            f.nextEnt = -1;
+            f.prefix = length;
+            f.state.termBlockOrd = 0;
+            f.fpOrig = f.fp = fp;
+            f.lastSubFP = -1;
+            // if (DEBUG) {
+            // final int sav = term.length;
+            // term.length = length;
+            // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
+            // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
+            // term.length = sav;
+            // }
+        }
+
+        return f;
+    }
+
+    // asserts only
+    private boolean clearEOF() {
+        eof = false;
+        return true;
+    }
+
+    // asserts only
+    private boolean setEOF() {
+        eof = true;
+        return true;
+    }
+
+    /*
+    // for debugging
+    @SuppressWarnings("unused")
+    static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+    }
+
+    // for debugging
+    @SuppressWarnings("unused")
+    static String brToString(BytesRefBuilder b) {
+    return brToString(b.get());
+    }
+    */
+
+    @Override
+    public boolean seekExact(BytesRef target) throws IOException {
+
+        if (fr.index == null) {
+            throw new IllegalStateException("terms index was not loaded");
+        }
+
+        if (fr.size() > 0 && (target.compareTo(fr.getMin()) < 0 || target.compareTo(fr.getMax()) > 0)) {
+            return false;
+        }
+
+        term.grow(1 + target.length);
+
+        assert clearEOF();
+
+        // if (DEBUG) {
+        // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
+        // fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
+        // + termExists + ") validIndexPrefix=" + validIndexPrefix);
+        // printSeekState(System.out);
+        // }
+
+        FST.Arc<BytesRef> arc;
+        int targetUpto;
+        BytesRef output;
+
+        targetBeforeCurrentLength = currentFrame.ord;
+
+        if (currentFrame != staticFrame) {
+
+            // We are already seek'd; find the common
+            // prefix of new seek term vs current term and
+            // re-use the corresponding seek state. For
+            // example, if app first seeks to foobar, then
+            // seeks to foobaz, we can re-use the seek state
+            // for the first 5 bytes.
+
+            // if (DEBUG) {
+            // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
+            // }
+
+            arc = arcs[0];
+            assert arc.isFinal();
+            output = arc.output();
+            targetUpto = 0;
+
+            SegmentTermsEnumFrame lastFrame = stack[0];
+            assert validIndexPrefix <= term.length();
+
+            final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+            int cmp = 0;
+
+            // TODO: reverse vLong byte order for better FST
+            // prefix output sharing
+
+            // First compare up to valid seek frames:
+            while (targetUpto < targetLimit) {
+                cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
+                // if (DEBUG) {
+                // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit
+                // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
+                // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
+                // + " output=" + output);
+                // }
+                if (cmp != 0) {
+                    break;
+                }
+                arc = arcs[1 + targetUpto];
+                assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
+                    : "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+                if (arc.output() != Lucene40BlockTreeTermsReader.NO_OUTPUT) {
+                    output = Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
+                }
+                if (arc.isFinal()) {
+                    lastFrame = stack[1 + lastFrame.ord];
+                }
+                targetUpto++;
+            }
+
+            if (cmp == 0) {
+                final int targetUptoMid = targetUpto;
+
+                // Second compare the rest of the term, but
+                // don't save arc/output/frame; we only do this
+                // to find out if the target term is before,
+                // equal or after the current term
+                final int targetLimit2 = Math.min(target.length, term.length());
+                while (targetUpto < targetLimit2) {
+                    cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
+                    // if (DEBUG) {
+                    // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" +
+                    // targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
+                    // targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+                    // }
+                    if (cmp != 0) {
+                        break;
+                    }
+                    targetUpto++;
+                }
+
+                if (cmp == 0) {
+                    cmp = term.length() - target.length;
+                }
+                targetUpto = targetUptoMid;
+            }
+
+            if (cmp < 0) {
+                // Common case: target term is after current
+                // term, ie, app is seeking multiple terms
+                // in sorted order
+                // if (DEBUG) {
+                // System.out.println(" target is after current (shares prefixLen=" + targetUpto + ");
+                // frame.ord=" + lastFrame.ord);
+                // }
+                currentFrame = lastFrame;
+
+            } else if (cmp > 0) {
+                // Uncommon case: target term
+                // is before current term; this means we can
+                // keep the currentFrame but we must rewind it
+                // (so we scan from the start)
+                targetBeforeCurrentLength = lastFrame.ord;
+                // if (DEBUG) {
+                // System.out.println(" target is before current (shares prefixLen=" + targetUpto + ");
+                // rewind frame ord=" + lastFrame.ord);
+                // }
+                currentFrame = lastFrame;
+                currentFrame.rewind();
+            } else {
+                // Target is exactly the same as current term
+                assert term.length() == target.length;
+                if (termExists) {
+                    // if (DEBUG) {
+                    // System.out.println(" target is same as current; return true");
+                    // }
+                    return true;
+                } else {
+                    // if (DEBUG) {
+                    // System.out.println(" target is same as current but term doesn't exist");
+                    // }
+                }
+                // validIndexPrefix = currentFrame.depth;
+                // term.length = target.length;
+                // return termExists;
+            }
+
+        } else {
+
+            targetBeforeCurrentLength = -1;
+            arc = fr.index.getFirstArc(arcs[0]);
+
+            // Empty string prefix must have an output (block) in the index!
+            assert arc.isFinal();
+            assert arc.output() != null;
+
+            // if (DEBUG) {
+            // System.out.println(" no seek state; push root frame");
+            // }
+
+            output = arc.output();
+
+            currentFrame = staticFrame;
+
+            // term.length = 0;
+            targetUpto = 0;
+            currentFrame = pushFrame(arc, Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
+        }
+
+        // if (DEBUG) {
+        // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
+        // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
+        // targetBeforeCurrentLength);
+        // }
+
+        // We are done sharing the common prefix with the incoming target and where we are currently
+        // seek'd; now continue walking the index:
+        while (targetUpto < target.length) {
+
+            final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+            final FST.Arc<BytesRef> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);
+
+            if (nextArc == null) {
+
+                // Index is exhausted
+                // if (DEBUG) {
+                // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " +
+                // toHex(targetLabel));
+                // }
+
+                validIndexPrefix = currentFrame.prefix;
+                // validIndexPrefix = targetUpto;
+
+                currentFrame.scanToFloorFrame(target);
+
+                if (currentFrame.hasTerms == false) {
+                    termExists = false;
+                    term.setByteAt(targetUpto, (byte) targetLabel);
+                    term.setLength(1 + targetUpto);
+                    // if (DEBUG) {
+                    // System.out.println(" FAST NOT_FOUND term=" + brToString(term));
+                    // }
+                    return false;
+                }
+
+                currentFrame.loadBlock();
+
+                final SeekStatus result = currentFrame.scanToTerm(target, true);
+                if (result == SeekStatus.FOUND) {
+                    // if (DEBUG) {
+                    // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
+                    // }
+                    return true;
+                } else {
+                    // if (DEBUG) {
+                    // System.out.println(" got " + result + "; return NOT_FOUND term=" +
+                    // brToString(term));
+                    // }
+                    return false;
+                }
+            } else {
+                // Follow this arc
+                arc = nextArc;
+                term.setByteAt(targetUpto, (byte) targetLabel);
+                // Aggregate output as we go:
+                assert arc.output() != null;
+                if (arc.output() != Lucene40BlockTreeTermsReader.NO_OUTPUT) {
+                    output = Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
+                }
+
+                // if (DEBUG) {
+                // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset +
+                // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+                // }
+                targetUpto++;
+
+                if (arc.isFinal()) {
+                    // if (DEBUG) System.out.println(" arc is final!");
+                    currentFrame = pushFrame(arc, Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
+                    // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
+                    // currentFrame.hasTerms);
+                }
+            }
+        }
+
+        // validIndexPrefix = targetUpto;
+        validIndexPrefix = currentFrame.prefix;
+
+        currentFrame.scanToFloorFrame(target);
+
+        // Target term is entirely contained in the index:
+        if (currentFrame.hasTerms == false) {
+            termExists = false;
+            term.setLength(targetUpto);
+            // if (DEBUG) {
+            // System.out.println(" FAST NOT_FOUND term=" + brToString(term));
+            // }
+            return false;
+        }
+
+        currentFrame.loadBlock();
+
+        final SeekStatus result = currentFrame.scanToTerm(target, true);
+        if (result == SeekStatus.FOUND) {
+            // if (DEBUG) {
+            // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
+            // }
+            return true;
+        } else {
+            // if (DEBUG) {
+            // System.out.println(" got result " + result + "; return NOT_FOUND term=" +
+            // term.utf8ToString());
+            // }
+
+            return false;
+        }
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef target) throws IOException {
+
+        if (fr.index == null) {
+            throw new IllegalStateException("terms index was not loaded");
+        }
+
+        term.grow(1 + target.length);
+
+        assert clearEOF();
+
+        // if (DEBUG) {
+        // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
+        // fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
+        // + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
+        // printSeekState(System.out);
+        // }
+
+        FST.Arc<BytesRef> arc;
+        int targetUpto;
+        BytesRef output;
+
+        targetBeforeCurrentLength = currentFrame.ord;
+
+        if (currentFrame != staticFrame) {
+
+            // We are already seek'd; find the common
+            // prefix of new seek term vs current term and
+            // re-use the corresponding seek state. For
+            // example, if app first seeks to foobar, then
+            // seeks to foobaz, we can re-use the seek state
+            // for the first 5 bytes.
+
+            // if (DEBUG) {
+            // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
+            // }
+
+            arc = arcs[0];
+            assert arc.isFinal();
+            output = arc.output();
+            targetUpto = 0;
+
+            SegmentTermsEnumFrame lastFrame = stack[0];
+            assert validIndexPrefix <= term.length();
+
+            final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+            int cmp = 0;
+
+            // TODO: we should write our vLong backwards (MSB
+            // first) to get better sharing from the FST
+
+            // First compare up to valid seek frames:
+            while (targetUpto < targetLimit) {
+                cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
+                // if (DEBUG) {
+                // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
+                // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
+                // vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
+                // " output=" + output);
+                // }
+                if (cmp != 0) {
+                    break;
+                }
+                arc = arcs[1 + targetUpto];
+                assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
+                    : "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+                // TODO: we could save the outputs in local
+                // byte[][] instead of making new objs ever
+                // seek; but, often the FST doesn't have any
+                // shared bytes (but this could change if we
+                // reverse vLong byte order)
+                if (arc.output() != Lucene40BlockTreeTermsReader.NO_OUTPUT) {
+                    output = Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
+                }
+                if (arc.isFinal()) {
+                    lastFrame = stack[1 + lastFrame.ord];
+                }
+                targetUpto++;
+            }
+
+            if (cmp == 0) {
+                final int targetUptoMid = targetUpto;
+                // Second compare the rest of the term, but
+                // don't save arc/output/frame:
+                final int targetLimit2 = Math.min(target.length, term.length());
+                while (targetUpto < targetLimit2) {
+                    cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
+                    // if (DEBUG) {
+                    // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit
+                    // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto])
+                    // + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")");
+                    // }
+                    if (cmp != 0) {
+                        break;
+                    }
+                    targetUpto++;
+                }
+
+                if (cmp == 0) {
+                    cmp = term.length() - target.length;
+                }
+                targetUpto = targetUptoMid;
+            }
+
+            if (cmp < 0) {
+                // Common case: target term is after current
+                // term, ie, app is seeking multiple terms
+                // in sorted order
+                // if (DEBUG) {
+                // System.out.println(" target is after current (shares prefixLen=" + targetUpto + ");
+                // clear frame.scanned ord=" + lastFrame.ord);
+                // }
+                currentFrame = lastFrame;
+
+            } else if (cmp > 0) {
+                // Uncommon case: target term
+                // is before current term; this means we can
+                // keep the currentFrame but we must rewind it
+                // (so we scan from the start)
+                targetBeforeCurrentLength = 0;
+                // if (DEBUG) {
+                // System.out.println(" target is before current (shares prefixLen=" + targetUpto + ");
+                // rewind frame ord=" + lastFrame.ord);
+                // }
+                currentFrame = lastFrame;
+                currentFrame.rewind();
+            } else {
+                // Target is exactly the same as current term
+                assert term.length() == target.length;
+                if (termExists) {
+                    // if (DEBUG) {
+                    // System.out.println(" target is same as current; return FOUND");
+                    // }
+                    return SeekStatus.FOUND;
+                } else {
+                    // if (DEBUG) {
+                    // System.out.println(" target is same as current but term doesn't exist");
+                    // }
+                }
+            }
+
+        } else {
+
+            targetBeforeCurrentLength = -1;
+            arc = fr.index.getFirstArc(arcs[0]);
+
+            // Empty string prefix must have an output (block) in the index!
+            assert arc.isFinal();
+            assert arc.output() != null;
+
+            // if (DEBUG) {
+            // System.out.println(" no seek state; push root frame");
+            // }
+
+            output = arc.output();
+
+            currentFrame = staticFrame;
+
+            // term.length = 0;
+            targetUpto = 0;
+            currentFrame = pushFrame(arc, Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
+        }
+
+        // if (DEBUG) {
+        // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
+        // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
+        // targetBeforeCurrentLength);
+        // }
+
+        // We are done sharing the common prefix with the incoming target and where we are currently
+        // seek'd; now continue walking the index:
+        while (targetUpto < target.length) {
+
+            final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+            final FST.Arc<BytesRef> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);
+
+            if (nextArc == null) {
+
+                // Index is exhausted
+                // if (DEBUG) {
+                // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " +
+                // targetLabel);
+                // }
+
+                validIndexPrefix = currentFrame.prefix;
+                // validIndexPrefix = targetUpto;
+
+                currentFrame.scanToFloorFrame(target);
+
+                currentFrame.loadBlock();
+
+                // if (DEBUG) System.out.println(" now scanToTerm");
+                final SeekStatus result = currentFrame.scanToTerm(target, false);
+                if (result == SeekStatus.END) {
+                    term.copyBytes(target);
+                    termExists = false;
+
+                    if (next() != null) {
+                        // if (DEBUG) {
+                        // System.out.println(" return NOT_FOUND term=" + brToString(term));
+                        // }
+                        return SeekStatus.NOT_FOUND;
+                    } else {
+                        // if (DEBUG) {
+                        // System.out.println(" return END");
+                        // }
+                        return SeekStatus.END;
+                    }
+                } else {
+                    // if (DEBUG) {
+                    // System.out.println(" return " + result + " term=" + brToString(term));
+                    // }
+                    return result;
+                }
+            } else {
+                // Follow this arc
+                term.setByteAt(targetUpto, (byte) targetLabel);
+                arc = nextArc;
+                // Aggregate output as we go:
+                assert arc.output() != null;
+                if (arc.output() != Lucene40BlockTreeTermsReader.NO_OUTPUT) {
+                    output = Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
+                }
+
+                // if (DEBUG) {
+                // System.out.println(" index: follow label=" + (target.bytes[target.offset +
+                // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+                // }
+                targetUpto++;
+
+                if (arc.isFinal()) {
+                    // if (DEBUG) System.out.println(" arc is final!");
+                    currentFrame = pushFrame(arc, Lucene40BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
+                    // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
+                    // currentFrame.hasTerms);
+                }
+            }
+        }
+
+        // validIndexPrefix = targetUpto;
+        validIndexPrefix = currentFrame.prefix;
+
+        currentFrame.scanToFloorFrame(target);
+
+        currentFrame.loadBlock();
+
+        final SeekStatus result = currentFrame.scanToTerm(target, false);
+
+        if (result == SeekStatus.END) {
+            term.copyBytes(target);
+            termExists = false;
+            if (next() != null) {
+                // if (DEBUG) {
+                // System.out.println(" return NOT_FOUND term=" + term.get().utf8ToString() + " " + term);
+                // }
+                return SeekStatus.NOT_FOUND;
+            } else {
+                // if (DEBUG) {
+                // System.out.println(" return END");
+                // }
+                return SeekStatus.END;
+            }
+        } else {
+            return result;
+        }
+    }
+
+    @SuppressWarnings("unused")
+    private void printSeekState(PrintStream out) throws IOException {
+        if (currentFrame == staticFrame) {
+            out.println("  no prior seek");
+        } else {
+            out.println("  prior seek state:");
+            int ord = 0;
+            boolean isSeekFrame = true;
+            while (true) {
+                SegmentTermsEnumFrame f = getFrame(ord);
+                assert f != null;
+                final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix);
+                if (f.nextEnt == -1) {
+                    out.println(
+                        "    frame "
+                            + (isSeekFrame ? "(seek)" : "(next)")
+                            + " ord="
+                            + ord
+                            + " fp="
+                            + f.fp
+                            + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
+                            + " prefixLen="
+                            + f.prefix
+                            + " prefix="
+                            + prefix
+                            + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+                            + " hasTerms="
+                            + f.hasTerms
+                            + " isFloor="
+                            + f.isFloor
+                            + " code="
+                            + ((f.fp << Lucene40BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms
+                                ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS
+                                : 0) + (f.isFloor ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
+                            + " isLastInFloor="
+                            + f.isLastInFloor
+                            + " mdUpto="
+                            + f.metaDataUpto
+                            + " tbOrd="
+                            + f.getTermBlockOrd()
+                    );
+                } else {
+                    out.println(
+                        "    frame "
+                            + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)")
+                            + " ord="
+                            + ord
+                            + " fp="
+                            + f.fp
+                            + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
+                            + " prefixLen="
+                            + f.prefix
+                            + " prefix="
+                            + prefix
+                            + " nextEnt="
+                            + f.nextEnt
+                            + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+                            + " hasTerms="
+                            + f.hasTerms
+                            + " isFloor="
+                            + f.isFloor
+                            + " code="
+                            + ((f.fp << Lucene40BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms
+                                ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS
+                                : 0) + (f.isFloor ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
+                            + " lastSubFP="
+                            + f.lastSubFP
+                            + " isLastInFloor="
+                            + f.isLastInFloor
+                            + " mdUpto="
+                            + f.metaDataUpto
+                            + " tbOrd="
+                            + f.getTermBlockOrd()
+                    );
+                }
+                if (fr.index != null) {
+                    assert isSeekFrame == false || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
+                    if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
+                        out.println(
+                            "      broken seek state: arc.label="
+                                + (char) f.arc.label()
+                                + " vs term byte="
+                                + (char) (term.byteAt(f.prefix - 1) & 0xFF)
+                        );
+                        throw new RuntimeException("seek state is broken");
+                    }
+                    BytesRef output = Util.get(fr.index, prefix);
+                    if (output == null) {
+                        out.println("      broken seek state: prefix is not final in index");
+                        throw new RuntimeException("seek state is broken");
+                    } else if (isSeekFrame && f.isFloor == false) {
+                        final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length);
+                        final long codeOrig = reader.readVLong();
+                        final long code = (f.fp << Lucene40BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms
+                            ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS
+                            : 0) | (f.isFloor ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
+                        if (codeOrig != code) {
+                            out.println("      broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
+                            throw new RuntimeException("seek state is broken");
+                        }
+                    }
+                }
+                if (f == currentFrame) {
+                    break;
+                }
+                if (f.prefix == validIndexPrefix) {
+                    isSeekFrame = false;
+                }
+                ord++;
+            }
+        }
+    }
+
+    /* Decodes only the term bytes of the next term.  If caller then asks for
+    metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+    decode all metadata up to the current term. */
+    @Override
+    public BytesRef next() throws IOException {
+        if (in == null) {
+            // Fresh TermsEnum; seek to first term:
+            final FST.Arc<BytesRef> arc;
+            if (fr.index != null) {
+                arc = fr.index.getFirstArc(arcs[0]);
+                // Empty string prefix must have an output in the index!
+                assert arc.isFinal();
+            } else {
+                arc = null;
+            }
+            currentFrame = pushFrame(arc, fr.rootCode, 0);
+            currentFrame.loadBlock();
+        }
+
+        targetBeforeCurrentLength = currentFrame.ord;
+
+        assert eof == false;
+        // if (DEBUG) {
+        // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
+        // termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
+        // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
+        // printSeekState(System.out);
+        // }
+
+        if (currentFrame == staticFrame) {
+            // If seek was previously called and the term was
+            // cached, or seek(TermState) was called, usually
+            // caller is just going to pull a D/&PEnum or get
+            // docFreq, etc. But, if they then call next(),
+            // this method catches up all internal state so next()
+            // works properly:
+            // if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " +
+            // term);
+            final boolean result = seekExact(term.get());
+            assert result;
+        }
+
+        // Pop finished blocks
+        while (currentFrame.nextEnt == currentFrame.entCount) {
+            if (currentFrame.isLastInFloor == false) {
+                // Advance to next floor block
+                currentFrame.loadNextFloorBlock();
+                break;
+            } else {
+                // if (DEBUG) System.out.println(" pop frame");
+                if (currentFrame.ord == 0) {
+                    // if (DEBUG) System.out.println(" return null");
+                    assert setEOF();
+                    term.clear();
+                    validIndexPrefix = 0;
+                    currentFrame.rewind();
+                    termExists = false;
+                    return null;
+                }
+                final long lastFP = currentFrame.fpOrig;
+                currentFrame = stack[currentFrame.ord - 1];
+
+                if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
+                    // We popped into a frame that's not loaded
+                    // yet or not scan'd to the right entry
+                    currentFrame.scanToFloorFrame(term.get());
+                    currentFrame.loadBlock();
+                    currentFrame.scanToSubBlock(lastFP);
+                }
+
+                // Note that the seek state (last seek) has been
+                // invalidated beyond this depth
+                validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
+                // if (DEBUG) {
+                // System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
+                // }
+            }
+        }
+
+        while (true) {
+            if (currentFrame.next()) {
+                // Push to new block:
+                // if (DEBUG) System.out.println(" push frame");
+                currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length());
+                // This is a "next" frame -- even if it's
+                // floor'd we must pretend it isn't so we don't
+                // try to scan to the right floor frame:
+                currentFrame.loadBlock();
+            } else {
+                // if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
+                // + currentFrame.ord);
+                return term.get();
+            }
+        }
+    }
+
+    @Override
+    public BytesRef term() {
+        assert eof == false;
+        return term.get();
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+        assert eof == false;
+        // if (DEBUG) System.out.println("BTR.docFreq");
+        currentFrame.decodeMetaData();
+        // if (DEBUG) System.out.println(" return " + currentFrame.state.docFreq);
+        return currentFrame.state.docFreq;
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+        assert eof == false;
+        currentFrame.decodeMetaData();
+        return currentFrame.state.totalTermFreq;
+    }
+
+    @Override
+    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+        assert eof == false;
+        // if (DEBUG) {
+        // System.out.println("BTTR.docs seg=" + segment);
+        // }
+        currentFrame.decodeMetaData();
+        // if (DEBUG) {
+        // System.out.println(" state=" + currentFrame.state);
+        // }
+        return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, reuse, flags);
+    }
+
+    @Override
+    public ImpactsEnum impacts(int flags) throws IOException {
+        assert eof == false;
+        // if (DEBUG) {
+        // System.out.println("BTTR.docs seg=" + segment);
+        // }
+        currentFrame.decodeMetaData();
+        // if (DEBUG) {
+        // System.out.println(" state=" + currentFrame.state);
+        // }
+        return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
+    }
+
+    @Override
+    public void seekExact(BytesRef target, TermState otherState) {
+        // if (DEBUG) {
+        // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" +
+        // target.utf8ToString() + " " + target + " state=" + otherState);
+        // }
+        assert clearEOF();
+        if (target.compareTo(term.get()) != 0 || termExists == false) {
+            assert otherState != null && otherState instanceof BlockTermState;
+            currentFrame = staticFrame;
+            currentFrame.state.copyFrom(otherState);
+            term.copyBytes(target);
+            currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
+            assert currentFrame.metaDataUpto > 0;
+            validIndexPrefix = 0;
+        } else {
+            // if (DEBUG) {
+            // System.out.println(" skip seek: already on target state=" + currentFrame.state);
+            // }
+        }
+    }
+
+    @Override
+    public TermState termState() throws IOException {
+        assert eof == false;
+        currentFrame.decodeMetaData();
+        TermState ts = currentFrame.state.clone();
+        // if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
+        return ts;
+    }
+
+    @Override
+    public void seekExact(long ord) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long ord() {
+        throw new UnsupportedOperationException();
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnumFrame.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnumFrame.java
new file mode 100644
index 0000000000000..82060c9cc5db3
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnumFrame.java
@@ -0,0 +1,765 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+final class SegmentTermsEnumFrame {
+    // Our index in stack[]:
+    final int ord;
+
+    boolean hasTerms;
+    boolean hasTermsOrig;
+    boolean isFloor;
+
+    FST.Arc<BytesRef> arc;
+
+    // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+    // File pointer where this block was loaded from
+    long fp;
+    long fpOrig;
+    long fpEnd;
+    long totalSuffixBytes; // for stats
+
+    byte[] suffixBytes = new byte[128];
+    final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();
+
+    byte[] suffixLengthBytes;
+    final ByteArrayDataInput suffixLengthsReader;
+
+    byte[] statBytes = new byte[64];
+    int statsSingletonRunLength = 0;
+    final ByteArrayDataInput statsReader = new ByteArrayDataInput();
+
+    byte[] floorData = new byte[32];
+    final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
+
+    // Length of prefix shared by all terms in this block
+    int prefix;
+
+    // Number of entries (term or sub-block) in this block
+    int entCount;
+
+    // Which term we will next read, or -1 if the block
+    // isn't loaded yet
+    int nextEnt;
+
+    // True if this block is either not a floor block,
+    // or, it's the last sub-block of a floor block
+    boolean isLastInFloor;
+
+    // True if all entries are terms
+    boolean isLeafBlock;
+
+    long lastSubFP;
+
+    int nextFloorLabel;
+    int numFollowFloorBlocks;
+
+    // Next term to decode metaData; we decode metaData
+    // lazily so that scanning to find the matching term is
+    // fast and only if you find a match and app wants the
+    // stats or docs/positions enums, will we decode the
+    // metaData
+    int metaDataUpto;
+
+    final BlockTermState state;
+
+    // metadata buffer
+    byte[] bytes = new byte[32];
+    final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
+
+    private final SegmentTermsEnum ste;
+    private final int version;
+
+    SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException {
+        this.ste = ste;
+        this.ord = ord;
+        this.state = ste.fr.parent.postingsReader.newTermState();
+        this.state.totalTermFreq = -1;
+        this.version = ste.fr.parent.version;
+        if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
+            suffixLengthBytes = new byte[32];
+            suffixLengthsReader = new ByteArrayDataInput();
+        } else {
+            suffixLengthBytes = null;
+            suffixLengthsReader = suffixesReader;
+        }
+    }
+
+    public void setFloorData(ByteArrayDataInput in, BytesRef source) {
+        final int numBytes = source.length - (in.getPosition() - source.offset);
+        if (numBytes > floorData.length) {
+            floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
+        }
+        System.arraycopy(source.bytes, source.offset + in.getPosition(), floorData, 0, numBytes);
+        floorDataReader.reset(floorData, 0, numBytes);
+        numFollowFloorBlocks = floorDataReader.readVInt();
+        nextFloorLabel = floorDataReader.readByte() & 0xff;
+        // if (DEBUG) {
+        // System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
+        // BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks="
+        // + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
+        // }
+    }
+
+    public int getTermBlockOrd() {
+        return isLeafBlock ? nextEnt : state.termBlockOrd;
+    }
+
+    void loadNextFloorBlock() throws IOException {
+        // if (DEBUG) {
+        // System.out.println(" loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd);
+        // }
+        assert arc == null || isFloor : "arc=" + arc + " isFloor=" + isFloor;
+        fp = fpEnd;
+        nextEnt = -1;
+        loadBlock();
+    }
+
+    /* Does initial decode of next block of terms; this
+    doesn't actually decode the docFreq, totalTermFreq,
+    postings details (frq/prx offset, etc.) metadata;
+    it just loads them as byte[] blobs which are then
+    decoded on-demand if the metadata is ever requested
+    for any term in this block.  This enables terms-only
+    intensive consumes (eg certain MTQs, respelling) to
+    not pay the price of decoding metadata they won't
+    use. */
+    void loadBlock() throws IOException {
+
+        // Clone the IndexInput lazily, so that consumers
+        // that just pull a TermsEnum to
+        // seekExact(TermState) don't pay this cost:
+        ste.initIndexInput();
+
+        if (nextEnt != -1) {
+            // Already loaded
+            return;
+        }
+        // System.out.println("blc=" + blockLoadCount);
+
+        ste.in.seek(fp);
+        int code = ste.in.readVInt();
+        entCount = code >>> 1;
+        assert entCount > 0;
+        isLastInFloor = (code & 1) != 0;
+
+        assert arc == null || (isLastInFloor || isFloor)
+            : "fp=" + fp + " arc=" + arc + " isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
+
+        // TODO: if suffixes were stored in random-access
+        // array structure, then we could do binary search
+        // instead of linear scan to find target term; eg
+        // we could have simple array of offsets
+
+        final long startSuffixFP = ste.in.getFilePointer();
+        // term suffixes:
+        if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
+            final long codeL = ste.in.readVLong();
+            isLeafBlock = (codeL & 0x04) != 0;
+            final int numSuffixBytes = (int) (codeL >>> 3);
+            if (suffixBytes.length < numSuffixBytes) {
+                suffixBytes = new byte[ArrayUtil.oversize(numSuffixBytes, 1)];
+            }
+            try {
+                compressionAlg = CompressionAlgorithm.byCode((int) codeL & 0x03);
+            } catch (IllegalArgumentException e) {
+                throw new CorruptIndexException(e.getMessage(), ste.in, e);
+            }
+            compressionAlg.read(ste.in, suffixBytes, numSuffixBytes);
+            suffixesReader.reset(suffixBytes, 0, numSuffixBytes);
+
+            int numSuffixLengthBytes = ste.in.readVInt();
+            final boolean allEqual = (numSuffixLengthBytes & 0x01) != 0;
+            numSuffixLengthBytes >>>= 1;
+            if (suffixLengthBytes.length < numSuffixLengthBytes) {
+                suffixLengthBytes = new byte[ArrayUtil.oversize(numSuffixLengthBytes, 1)];
+            }
+            if (allEqual) {
+                Arrays.fill(suffixLengthBytes, 0, numSuffixLengthBytes, ste.in.readByte());
+            } else {
+                ste.in.readBytes(suffixLengthBytes, 0, numSuffixLengthBytes);
+            }
+            suffixLengthsReader.reset(suffixLengthBytes, 0, numSuffixLengthBytes);
+        } else {
+            code = ste.in.readVInt();
+            isLeafBlock = (code & 1) != 0;
+            int numBytes = code >>> 1;
+            if (suffixBytes.length < numBytes) {
+                suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+            }
+            ste.in.readBytes(suffixBytes, 0, numBytes);
+            suffixesReader.reset(suffixBytes, 0, numBytes);
+        }
+        totalSuffixBytes = ste.in.getFilePointer() - startSuffixFP;
+
+        // stats
+        int numBytes = ste.in.readVInt();
+        if (statBytes.length < numBytes) {
+            statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+        }
+        ste.in.readBytes(statBytes, 0, numBytes);
+        statsReader.reset(statBytes, 0, numBytes);
+        statsSingletonRunLength = 0;
+        metaDataUpto = 0;
+
+        state.termBlockOrd = 0;
+        nextEnt = 0;
+        lastSubFP = -1;
+
+        // TODO: we could skip this if !hasTerms; but
+        // that's rare so won't help much
+        // metadata
+        numBytes = ste.in.readVInt();
+        if (bytes.length < numBytes) {
+            bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+        }
+        ste.in.readBytes(bytes, 0, numBytes);
+        bytesReader.reset(bytes, 0, numBytes);
+
+        // Sub-blocks of a single floor block are always
+        // written one after another -- tail recurse:
+        fpEnd = ste.in.getFilePointer();
+        // if (DEBUG) {
+        // System.out.println(" fpEnd=" + fpEnd);
+        // }
+    }
+
+    void rewind() {
+
+        // Force reload:
+        fp = fpOrig;
+        nextEnt = -1;
+        hasTerms = hasTermsOrig;
+        if (isFloor) {
+            floorDataReader.rewind();
+            numFollowFloorBlocks = floorDataReader.readVInt();
+            assert numFollowFloorBlocks > 0;
+            nextFloorLabel = floorDataReader.readByte() & 0xff;
+        }
+    }
+
+    // Decodes next entry; returns true if it's a sub-block
+    public boolean next() throws IOException {
+        if (isLeafBlock) {
+            nextLeaf();
+            return false;
+        } else {
+            return nextNonLeaf();
+        }
+    }
+
+    public void nextLeaf() {
+        // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
+        // entCount=" + entCount);
+        assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+        nextEnt++;
+        suffix = suffixLengthsReader.readVInt();
+        startBytePos = suffixesReader.getPosition();
+        ste.term.setLength(prefix + suffix);
+        ste.term.grow(ste.term.length());
+        suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
+        ste.termExists = true;
+    }
+
+    public boolean nextNonLeaf() throws IOException {
+        // if (DEBUG) System.out.println(" stef.next ord=" + ord + " nextEnt=" + nextEnt + " entCount="
+        // + entCount + " fp=" + suffixesReader.getPosition());
+        while (true) {
+            if (nextEnt == entCount) {
+                assert arc == null || (isFloor && isLastInFloor == false) : "isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
+                loadNextFloorBlock();
+                if (isLeafBlock) {
+                    nextLeaf();
+                    return false;
+                } else {
+                    continue;
+                }
+            }
+
+            assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+            nextEnt++;
+            final int code = suffixLengthsReader.readVInt();
+            suffix = code >>> 1;
+            startBytePos = suffixesReader.getPosition();
+            ste.term.setLength(prefix + suffix);
+            ste.term.grow(ste.term.length());
+            suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
+            if ((code & 1) == 0) {
+                // A normal term
+                ste.termExists = true;
+                subCode = 0;
+                state.termBlockOrd++;
+                return false;
+            } else {
+                // A sub-block; make sub-FP absolute:
+                ste.termExists = false;
+                subCode = suffixLengthsReader.readVLong();
+                lastSubFP = fp - subCode;
+                // if (DEBUG) {
+                // System.out.println(" lastSubFP=" + lastSubFP);
+                // }
+                return true;
+            }
+        }
+    }
+
+    // TODO: make this array'd so we can do bin search?
+    // likely not worth it? need to measure how many
+    // floor blocks we "typically" get
+    public void scanToFloorFrame(BytesRef target) {
+
+        if (isFloor == false || target.length <= prefix) {
+            // if (DEBUG) {
+            // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" +
+            // target.length + " vs prefix=" + prefix);
+            // }
+            return;
+        }
+
+        final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
+
+        // if (DEBUG) {
+        // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" +
+        // toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks="
+        // + numFollowFloorBlocks);
+        // }
+
+        if (targetLabel < nextFloorLabel) {
+            // if (DEBUG) {
+            // System.out.println(" already on correct block");
+            // }
+            return;
+        }
+
+        assert numFollowFloorBlocks != 0;
+
+        long newFP = fpOrig;
+        while (true) {
+            final long code = floorDataReader.readVLong();
+            newFP = fpOrig + (code >>> 1);
+            hasTerms = (code & 1) != 0;
+            // if (DEBUG) {
+            // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
+            // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
+            // }
+
+            isLastInFloor = numFollowFloorBlocks == 1;
+            numFollowFloorBlocks--;
+
+            if (isLastInFloor) {
+                nextFloorLabel = 256;
+                // if (DEBUG) {
+                // System.out.println(" stop! last block nextFloorLabel=" +
+                // toHex(nextFloorLabel));
+                // }
+                break;
+            } else {
+                nextFloorLabel = floorDataReader.readByte() & 0xff;
+                if (targetLabel < nextFloorLabel) {
+                    // if (DEBUG) {
+                    // System.out.println(" stop! nextFloorLabel=" + toHex(nextFloorLabel));
+                    // }
+                    break;
+                }
+            }
+        }
+
+        if (newFP != fp) {
+            // Force re-load of the block:
+            // if (DEBUG) {
+            // System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp);
+            // }
+            nextEnt = -1;
+            fp = newFP;
+        } else {
+            // if (DEBUG) {
+            // System.out.println(" stay on same fp=" + newFP);
+            // }
+        }
+    }
+
+    public void decodeMetaData() throws IOException {
+
+        // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" +
+        // metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
+
+        // lazily catch up on metadata decode:
+        final int limit = getTermBlockOrd();
+        boolean absolute = metaDataUpto == 0;
+        assert limit > 0;
+
+        // TODO: better API would be "jump straight to term=N"???
+        while (metaDataUpto < limit) {
+
+            // TODO: we could make "tiers" of metadata, ie,
+            // decode docFreq/totalTF but don't decode postings
+            // metadata; this way caller could get
+            // docFreq/totalTF w/o paying decode cost for
+            // postings
+
+            // TODO: if docFreq were bulk decoded we could
+            // just skipN here:
+
+            if (version >= Lucene40BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
+                if (statsSingletonRunLength > 0) {
+                    state.docFreq = 1;
+                    state.totalTermFreq = 1;
+                    statsSingletonRunLength--;
+                } else {
+                    int token = statsReader.readVInt();
+                    if ((token & 1) == 1) {
+                        state.docFreq = 1;
+                        state.totalTermFreq = 1;
+                        statsSingletonRunLength = token >>> 1;
+                    } else {
+                        state.docFreq = token >>> 1;
+                        if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
+                            state.totalTermFreq = state.docFreq;
+                        } else {
+                            state.totalTermFreq = state.docFreq + statsReader.readVLong();
+                        }
+                    }
+                }
+            } else {
+                assert statsSingletonRunLength == 0;
+                state.docFreq = statsReader.readVInt();
+                // if (DEBUG) System.out.println(" dF=" + state.docFreq);
+                if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
+                    state.totalTermFreq = state.docFreq; // all postings have freq=1
+                } else {
+                    state.totalTermFreq = state.docFreq + statsReader.readVLong();
+                    // if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
+                }
+            }
+
+            // metadata
+            ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
+
+            metaDataUpto++;
+            absolute = false;
+        }
+        state.termBlockOrd = metaDataUpto;
+    }
+
+    // Used only by assert
+    private boolean prefixMatches(BytesRef target) {
+        for (int bytePos = 0; bytePos < prefix; bytePos++) {
+            if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    // Scans to sub-block that has this target fp; only
+    // called by next(); NOTE: does not set
+    // startBytePos/suffix as a side effect
+    public void scanToSubBlock(long subFP) {
+        assert isLeafBlock == false;
+        // if (DEBUG) System.out.println(" scanToSubBlock fp=" + fp + " subFP=" + subFP + " entCount="
+        // + entCount + " lastSubFP=" + lastSubFP);
+        // assert nextEnt == 0;
+        if (lastSubFP == subFP) {
+            // if (DEBUG) System.out.println(" already positioned");
+            return;
+        }
+        assert subFP < fp : "fp=" + fp + " subFP=" + subFP;
+        final long targetSubCode = fp - subFP;
+        // if (DEBUG) System.out.println(" targetSubCode=" + targetSubCode);
+        while (true) {
+            assert nextEnt < entCount;
+            nextEnt++;
+            final int code = suffixLengthsReader.readVInt();
+            suffixesReader.skipBytes(code >>> 1);
+            if ((code & 1) != 0) {
+                final long subCode = suffixLengthsReader.readVLong();
+                if (targetSubCode == subCode) {
+                    // if (DEBUG) System.out.println(" match!");
+                    lastSubFP = subFP;
+                    return;
+                }
+            } else {
+                state.termBlockOrd++;
+            }
+        }
+    }
+
+    // NOTE: sets startBytePos/suffix as a side effect
+    public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
+        return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
+    }
+
+    private int startBytePos;
+    private int suffix;
+    private long subCode;
+    CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
+
+    // for debugging
+    /*
+    @SuppressWarnings("unused")
+    static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+    }
+    */
+
+    // Target's prefix matches this block's prefix; we
+    // scan the entries check if the suffix matches.
+    public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
+
+        // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
+        // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
+        // brToString(term));
+
+        assert nextEnt != -1;
+
+        ste.termExists = true;
+        subCode = 0;
+
+        if (nextEnt == entCount) {
+            if (exactOnly) {
+                fillTerm();
+            }
+            return SeekStatus.END;
+        }
+
+        assert prefixMatches(target);
+
+        // TODO: binary search when all terms have the same length, which is common for ID fields,
+        // which are also the most sensitive to lookup performance?
+        // Loop over each entry (term or sub-block) in this block:
+        do {
+            nextEnt++;
+
+            suffix = suffixLengthsReader.readVInt();
+
+            // if (DEBUG) {
+            // BytesRef suffixBytesRef = new BytesRef();
+            // suffixBytesRef.bytes = suffixBytes;
+            // suffixBytesRef.offset = suffixesReader.getPosition();
+            // suffixBytesRef.length = suffix;
+            // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
+            // + brToString(suffixBytesRef));
+            // }
+
+            startBytePos = suffixesReader.getPosition();
+            suffixesReader.skipBytes(suffix);
+
+            // Loop over bytes in the suffix, comparing to the target
+            final int cmp = Arrays.compareUnsigned(
+                suffixBytes,
+                startBytePos,
+                startBytePos + suffix,
+                target.bytes,
+                target.offset + prefix,
+                target.offset + target.length
+            );
+
+            if (cmp < 0) {
+                // Current entry is still before the target;
+                // keep scanning
+            } else if (cmp > 0) {
+                // Done! Current entry is after target --
+                // return NOT_FOUND:
+                fillTerm();
+
+                // if (DEBUG) System.out.println(" not found");
+                return SeekStatus.NOT_FOUND;
+            } else {
+                // Exact match!
+
+                // This cannot be a sub-block because we
+                // would have followed the index to this
+                // sub-block from the start:
+
+                assert ste.termExists;
+                fillTerm();
+                // if (DEBUG) System.out.println(" found!");
+                return SeekStatus.FOUND;
+            }
+        } while (nextEnt < entCount);
+
+        // It is possible (and OK) that terms index pointed us
+        // at this block, but, we scanned the entire block and
+        // did not find the term to position to. This happens
+        // when the target is after the last term in the block
+        // (but, before the next term in the index). EG
+        // target could be foozzz, and terms index pointed us
+        // to the foo* block, but the last term in this block
+        // was fooz (and, eg, first term in the next block will
+        // bee fop).
+        // if (DEBUG) System.out.println(" block end");
+        if (exactOnly) {
+            fillTerm();
+        }
+
+        // TODO: not consistent that in the
+        // not-exact case we don't next() into the next
+        // frame here
+        return SeekStatus.END;
+    }
+
+    // Target's prefix matches this block's prefix; we
+    // scan the entries check if the suffix matches.
+    public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
+
+        // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
+        // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
+        // brToString(target));
+
+        assert nextEnt != -1;
+
+        if (nextEnt == entCount) {
+            if (exactOnly) {
+                fillTerm();
+                ste.termExists = subCode == 0;
+            }
+            return SeekStatus.END;
+        }
+
+        assert prefixMatches(target);
+
+        // Loop over each entry (term or sub-block) in this block:
+        while (nextEnt < entCount) {
+
+            nextEnt++;
+
+            final int code = suffixLengthsReader.readVInt();
+            suffix = code >>> 1;
+
+            // if (DEBUG) {
+            // BytesRef suffixBytesRef = new BytesRef();
+            // suffixBytesRef.bytes = suffixBytes;
+            // suffixBytesRef.offset = suffixesReader.getPosition();
+            // suffixBytesRef.length = suffix;
+            // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
+            // (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+            // }
+
+            final int termLen = prefix + suffix;
+            startBytePos = suffixesReader.getPosition();
+            suffixesReader.skipBytes(suffix);
+            ste.termExists = (code & 1) == 0;
+            if (ste.termExists) {
+                state.termBlockOrd++;
+                subCode = 0;
+            } else {
+                subCode = suffixLengthsReader.readVLong();
+                lastSubFP = fp - subCode;
+            }
+
+            final int cmp = Arrays.compareUnsigned(
+                suffixBytes,
+                startBytePos,
+                startBytePos + suffix,
+                target.bytes,
+                target.offset + prefix,
+                target.offset + target.length
+            );
+
+            if (cmp < 0) {
+                // Current entry is still before the target;
+                // keep scanning
+            } else if (cmp > 0) {
+                // Done! Current entry is after target --
+                // return NOT_FOUND:
+                fillTerm();
+
+                // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
+                // ste.termExists=" + ste.termExists);
+
+                if (exactOnly == false && ste.termExists == false) {
+                    // System.out.println(" now pushFrame");
+                    // TODO this
+                    // We are on a sub-block, and caller wants
+                    // us to position to the next term after
+                    // the target, so we must recurse into the
+                    // sub-frame(s):
+                    ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
+                    ste.currentFrame.loadBlock();
+                    while (ste.currentFrame.next()) {
+                        ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length());
+                        ste.currentFrame.loadBlock();
+                    }
+                }
+
+                // if (DEBUG) System.out.println(" not found");
+                return SeekStatus.NOT_FOUND;
+            } else {
+                // Exact match!
+
+                // This cannot be a sub-block because we
+                // would have followed the index to this
+                // sub-block from the start:
+
+                assert ste.termExists;
+                fillTerm();
+                // if (DEBUG) System.out.println(" found!");
+                return SeekStatus.FOUND;
+            }
+        }
+
+        // It is possible (and OK) that terms index pointed us
+        // at this block, but, we scanned the entire block and
+        // did not find the term to position to. This happens
+        // when the target is after the last term in the block
+        // (but, before the next term in the index). EG
+        // target could be foozzz, and terms index pointed us
+        // to the foo* block, but the last term in this block
+        // was fooz (and, eg, first term in the next block will
+        // bee fop).
+        // if (DEBUG) System.out.println(" block end");
+        if (exactOnly) {
+            fillTerm();
+        }
+
+        // TODO: not consistent that in the
+        // not-exact case we don't next() into the next
+        // frame here
+        return SeekStatus.END;
+    }
+
+    private void fillTerm() {
+        final int termLength = prefix + suffix;
+        ste.term.setLength(termLength);
+        ste.term.grow(termLength);
+        System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefix, suffix);
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java
new file mode 100644
index 0000000000000..90ee6d1115a57
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java
@@ -0,0 +1,277 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.core.internal.io.IOUtils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Locale;
+
+/**
+ * BlockTree statistics for a single field returned by {@link FieldReader#getStats()}.
+ *
+ * @lucene.internal
+ */
+public class Stats {
+    /** Byte size of the index. */
+    public long indexNumBytes;
+
+    /** Total number of terms in the field. */
+    public long totalTermCount;
+
+    /** Total number of bytes (sum of term lengths) across all terms in the field. */
+    public long totalTermBytes;
+
+    /** The number of normal (non-floor) blocks in the terms file. */
+    public int nonFloorBlockCount;
+
+    /**
+     * The number of floor blocks (meta-blocks larger than the allowed {@code maxItemsPerBlock}) in
+     * the terms file.
+     */
+    public int floorBlockCount;
+
+    /** The number of sub-blocks within the floor blocks. */
+    public int floorSubBlockCount;
+
+    /** The number of "internal" blocks (that have both terms and sub-blocks). */
+    public int mixedBlockCount;
+
+    /** The number of "leaf" blocks (blocks that have only terms). */
+    public int termsOnlyBlockCount;
+
+    /** The number of "internal" blocks that do not contain terms (have only sub-blocks). */
+    public int subBlocksOnlyBlockCount;
+
+    /** Total number of blocks. */
+    public int totalBlockCount;
+
+    /** Number of blocks at each prefix depth. */
+    public int[] blockCountByPrefixLen = new int[10];
+
+    private int startBlockCount;
+    private int endBlockCount;
+
+    /** Total number of bytes used to store term suffixes. */
+    public long totalBlockSuffixBytes;
+
+    /**
+     * Number of times each compression method has been used. 0 = uncompressed 1 = lowercase_ascii 2 =
+     * LZ4
+     */
+    public final long[] compressionAlgorithms = new long[3];
+
+    /** Total number of suffix bytes before compression. */
+    public long totalUncompressedBlockSuffixBytes;
+
+    /**
+     * Total number of bytes used to store term stats (not including what the {@link
+     * PostingsReaderBase} stores.
+     */
+    public long totalBlockStatsBytes;
+
+    /**
+     * Total bytes stored by the {@link PostingsReaderBase}, plus the other few vInts stored in the
+     * frame.
+     */
+    public long totalBlockOtherBytes;
+
+    /** Segment name. */
+    public final String segment;
+
+    /** Field name. */
+    public final String field;
+
+    Stats(String segment, String field) {
+        this.segment = segment;
+        this.field = field;
+    }
+
+    void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) {
+        totalBlockCount++;
+        if (isFloor) {
+            if (frame.fp == frame.fpOrig) {
+                floorBlockCount++;
+            }
+            floorSubBlockCount++;
+        } else {
+            nonFloorBlockCount++;
+        }
+
+        if (blockCountByPrefixLen.length <= frame.prefix) {
+            blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1 + frame.prefix);
+        }
+        blockCountByPrefixLen[frame.prefix]++;
+        startBlockCount++;
+        totalBlockSuffixBytes += frame.totalSuffixBytes;
+        totalUncompressedBlockSuffixBytes += frame.suffixesReader.length();
+        if (frame.suffixesReader != frame.suffixLengthsReader) {
+            totalUncompressedBlockSuffixBytes += frame.suffixLengthsReader.length();
+        }
+        totalBlockStatsBytes += frame.statsReader.length();
+        compressionAlgorithms[frame.compressionAlg.code]++;
+    }
+
+    void endBlock(SegmentTermsEnumFrame frame) {
+        final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
+        final int subBlockCount = frame.entCount - termCount;
+        totalTermCount += termCount;
+        if (termCount != 0 && subBlockCount != 0) {
+            mixedBlockCount++;
+        } else if (termCount != 0) {
+            termsOnlyBlockCount++;
+        } else if (subBlockCount != 0) {
+            subBlocksOnlyBlockCount++;
+        } else {
+            throw new IllegalStateException();
+        }
+        endBlockCount++;
+        final long otherBytes = frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length();
+        assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
+        totalBlockOtherBytes += otherBytes;
+    }
+
+    void term(BytesRef term) {
+        totalTermBytes += term.length;
+    }
+
+    void finish() {
+        assert startBlockCount == endBlockCount : "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
+        assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount
+            : "floorSubBlockCount="
+                + floorSubBlockCount
+                + " nonFloorBlockCount="
+                + nonFloorBlockCount
+                + " totalBlockCount="
+                + totalBlockCount;
+        assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount
+            : "totalBlockCount="
+                + totalBlockCount
+                + " mixedBlockCount="
+                + mixedBlockCount
+                + " subBlocksOnlyBlockCount="
+                + subBlocksOnlyBlockCount
+                + " termsOnlyBlockCount="
+                + termsOnlyBlockCount;
+    }
+
+    @Override
+    public String toString() {
+        final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+        PrintStream out;
+        try {
+            out = new PrintStream(bos, false, IOUtils.UTF_8);
+        } catch (UnsupportedEncodingException bogus) {
+            throw new RuntimeException(bogus);
+        }
+
+        out.println("  index FST:");
+        out.println("    " + indexNumBytes + " bytes");
+        out.println("  terms:");
+        out.println("    " + totalTermCount + " terms");
+        out.println(
+            "    "
+                + totalTermBytes
+                + " bytes"
+                + (totalTermCount != 0
+                    ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes) / totalTermCount) + " bytes/term)"
+                    : "")
+        );
+        out.println("  blocks:");
+        out.println("    " + totalBlockCount + " blocks");
+        out.println("    " + termsOnlyBlockCount + " terms-only blocks");
+        out.println("    " + subBlocksOnlyBlockCount + " sub-block-only blocks");
+        out.println("    " + mixedBlockCount + " mixed blocks");
+        out.println("    " + floorBlockCount + " floor blocks");
+        out.println("    " + (totalBlockCount - floorSubBlockCount) + " non-floor blocks");
+        out.println("    " + floorSubBlockCount + " floor sub-blocks");
+        out.println(
+            "    "
+                + totalUncompressedBlockSuffixBytes
+                + " term suffix bytes before compression"
+                + (totalBlockCount != 0
+                    ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes) / totalBlockCount) + " suffix-bytes/block)"
+                    : "")
+        );
+        StringBuilder compressionCounts = new StringBuilder();
+        for (int code = 0; code < compressionAlgorithms.length; ++code) {
+            if (compressionAlgorithms[code] == 0) {
+                continue;
+            }
+            if (compressionCounts.length() > 0) {
+                compressionCounts.append(", ");
+            }
+            compressionCounts.append(CompressionAlgorithm.byCode(code));
+            compressionCounts.append(": ");
+            compressionCounts.append(compressionAlgorithms[code]);
+        }
+        out.println(
+            "    "
+                + totalBlockSuffixBytes
+                + " compressed term suffix bytes"
+                + (totalBlockCount != 0
+                    ? " ("
+                        + String.format(Locale.ROOT, "%.2f", ((double) totalBlockSuffixBytes) / totalUncompressedBlockSuffixBytes)
+                        + " compression ratio - compression count by algorithm: "
+                        + compressionCounts
+                    : "")
+                + ")"
+        );
+        out.println(
+            "    "
+                + totalBlockStatsBytes
+                + " term stats bytes "
+                + (totalBlockCount != 0
+                    ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes) / totalBlockCount) + " stats-bytes/block)"
+                    : "")
+        );
+        out.println(
+            "    "
+                + totalBlockOtherBytes
+                + " other bytes"
+                + (totalBlockCount != 0
+                    ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes) / totalBlockCount) + " other-bytes/block)"
+                    : "")
+        );
+        if (totalBlockCount != 0) {
+            out.println("    by prefix length:");
+            int total = 0;
+            for (int prefix = 0; prefix < blockCountByPrefixLen.length; prefix++) {
+                final int blockCount = blockCountByPrefixLen[prefix];
+                total += blockCount;
+                if (blockCount != 0) {
+                    out.println("      " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
+                }
+            }
+            assert totalBlockCount == total;
+        }
+
+        try {
+            return bos.toString(IOUtils.UTF_8);
+        } catch (UnsupportedEncodingException bogus) {
+            throw new RuntimeException(bogus);
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java
new file mode 100644
index 0000000000000..fd04a28ce23fb
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java
@@ -0,0 +1,477 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.packed.PackedInts;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree.Lucene40BlockTreeTermsReader;
+
+import java.io.IOException;
+
+/**
+ * Lucene 5.0 postings format, which encodes postings in packed integer blocks for fast decode.
+ *
+ * <p>Basic idea:
+ *
+ * <ul>
+ *   <li><b>Packed Blocks and VInt Blocks</b>:
+ *       <p>In packed blocks, integers are encoded with the same bit width ({@link PackedInts packed
+ *       format}): the block size (i.e. number of integers inside block) is fixed (currently 128).
+ *       Additionally blocks that are all the same value are encoded in an optimized way.
+ *       <p>In VInt blocks, integers are encoded as {@link DataOutput#writeVInt VInt}: the block
+ *       size is variable.
+ *   <li><b>Block structure</b>:
+ *       <p>When the postings are long enough, Lucene50PostingsFormat will try to encode most
+ *       integer data as a packed block.
+ *       <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as
+ *       two packed blocks, while the remaining 3 are encoded as one VInt block.
+ *       <p>Different kinds of data are always encoded separately into different packed blocks, but
+ *       may possibly be interleaved into the same VInt block.
+ *       <p>This strategy is applied to pairs: &lt;document number, frequency&gt;, &lt;position,
+ *       payload length&gt;, &lt;position, offset start, offset length&gt;, and &lt;position,
+ *       payload length, offsetstart, offset length&gt;.
+ *   <li><b>Skipdata settings</b>:
+ *       <p>The structure of skip table is quite similar to previous version of Lucene. Skip
+ *       interval is the same as block size, and each skip entry points to the beginning of each
+ *       block. However, for the first block, skip data is omitted.
+ *   <li><b>Positions, Payloads, and Offsets</b>:
+ *       <p>A position is an integer indicating where the term occurs within one document. A payload
+ *       is a blob of metadata associated with current position. An offset is a pair of integers
+ *       indicating the tokenized start/end offsets for given term in current position: it is
+ *       essentially a specialized payload.
+ *       <p>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets
+ *       (assuming a null payload contributes one count). As mentioned in block structure, it is
+ *       possible to encode these three either combined or separately.
+ *       <p>In all cases, payloads and offsets are stored together. When encoded as a packed block,
+ *       position data is separated out as .pos, while payloads and offsets are encoded in .pay
+ *       (payload metadata will also be stored directly in .pay). When encoded as VInt blocks, all
+ *       these three are stored interleaved into the .pos (so is payload metadata).
+ *       <p>With this strategy, the majority of payload and offset data will be outside .pos file.
+ *       So for queries that require only position data, running on a full index with payloads and
+ *       offsets, this reduces disk pre-fetches.
+ * </ul>
+ *
+ * <p>Files and detailed format:
+ *
+ * <ul>
+ *   <li><code>.tim</code>: <a href="#Termdictionary">Term Dictionary</a>
+ *   <li><code>.tip</code>: <a href="#Termindex">Term Index</a>
+ *   <li><code>.doc</code>: <a href="#Frequencies">Frequencies and Skip Data</a>
+ *   <li><code>.pos</code>: <a href="#Positions">Positions</a>
+ *   <li><code>.pay</code>: <a href="#Payloads">Payloads and Offsets</a>
+ * </ul>
+ *
+ * <a id="Termdictionary"></a>
+ *
+ * <dl>
+ *   <dd><b>Term Dictionary</b>
+ *       <p>The .tim file contains the list of terms in each field along with per-term statistics
+ *       (such as docfreq) and pointers to the frequencies, positions, payload and skip data in the
+ *       .doc, .pos, and .pay files. See {@code BlockTreeTermsWriter} for more details on the
+ *       format.
+ *       <p>NOTE: The term dictionary can plug into different postings implementations: the postings
+ *       writer/reader are actually responsible for encoding and decoding the PostingsHeader and
+ *       TermMetadata sections described here:
+ *       <ul>
+ *         <li>PostingsHeader --&gt; Header, PackedBlockSize
+ *         <li>TermMetadata --&gt; (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?,
+ *             PayFPDelta?, SkipFPDelta?
+ *         <li>Header, --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}
+ *         <li>PackedBlockSize, SingletonDocID --&gt; {@link DataOutput#writeVInt VInt}
+ *         <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --&gt; {@link
+ *             DataOutput#writeVLong VLong}
+ *         <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}
+ *       </ul>
+ *       <p>Notes:
+ *       <ul>
+ *         <li>Header is a {@link CodecUtil#writeIndexHeader IndexHeader} storing the version
+ *             information for the postings.
+ *         <li>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width
+ *             is determined by the largest integer. Smaller block size result in smaller variance
+ *             among width of integers hence smaller indexes. Larger block size result in more
+ *             efficient bulk i/o hence better acceleration. This value should always be a multiple
+ *             of 64, currently fixed as 128 as a tradeoff. It is also the skip interval used to
+ *             accelerate {@link org.apache.lucene.index.PostingsEnum#advance(int)}.
+ *         <li>DocFPDelta determines the position of this term's TermFreqs within the .doc file. In
+ *             particular, it is the difference of file offset between this term's data and previous
+ *             term's data (or zero, for the first term in the block).On disk it is stored as the
+ *             difference from previous value in sequence.
+ *         <li>PosFPDelta determines the position of this term's TermPositions within the .pos file.
+ *             While PayFPDelta determines the position of this term's &lt;TermPayloads,
+ *             TermOffsets?&gt; within the .pay file. Similar to DocFPDelta, it is the difference
+ *             between two file positions (or neglected, for fields that omit payloads and offsets).
+ *         <li>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last
+ *             pos packed block within the .pos file. It is synonym for PayVIntBlockFPDelta or
+ *             OffsetVIntBlockFPDelta. This is actually used to indicate whether it is necessary to
+ *             load following payloads and offsets from .pos instead of .pay. Every time a new block
+ *             of positions are to be loaded, the PostingsReader will use this value to check
+ *             whether current block is packed format or VInt. When packed format, payloads and
+ *             offsets are fetched from .pay, otherwise from .pos. (this value is neglected when
+ *             total number of positions i.e. totalTermFreq is less or equal to PackedBlockSize).
+ *         <li>SkipFPDelta determines the position of this term's SkipData within the .doc file. In
+ *             particular, it is the length of the TermFreq data. SkipDelta is only stored if
+ *             DocFreq is not smaller than SkipMinimum (i.e. 128 in Lucene50PostingsFormat).
+ *         <li>SingletonDocID is an optimization when a term only appears in one document. In this
+ *             case, instead of writing a file pointer to the .doc file (DocFPDelta), and then a
+ *             VIntBlock at that location, the single document ID is written to the term dictionary.
+ *       </ul>
+ * </dl>
+ *
+ * <a id="Termindex"></a>
+ *
+ * <dl>
+ *   <dd><b>Term Index</b>
+ *       <p>The .tip file contains an index into the term dictionary, so that it can be accessed
+ *       randomly. See {@code BlockTreeTermsWriter} for more details on the format.
+ * </dl>
+ *
+ * <a id="Frequencies"></a>
+ *
+ * <dl>
+ *   <dd><b>Frequencies and Skip Data</b>
+ *       <p>The .doc file contains the lists of documents which contain each term, along with the
+ *       frequency of the term in that document (except when frequencies are omitted: {@link
+ *       IndexOptions#DOCS}). It also saves skip data to the beginning of each packed or VInt block,
+ *       when the length of document list is larger than packed block size.
+ *       <ul>
+ *         <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup>, Footer
+ *         <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}
+ *         <li>TermFreqs --&gt; &lt;PackedBlock&gt; <sup>PackedDocBlockNum</sup>, VIntBlock?
+ *         <li>PackedBlock --&gt; PackedDocDeltaBlock, PackedFreqBlock?
+ *         <li>VIntBlock --&gt; &lt;DocDelta[,
+ *             Freq?]&gt;<sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup>
+ *         <li>SkipData --&gt; &lt;&lt;SkipLevelLength, SkipLevel&gt; <sup>NumSkipLevels-1</sup>,
+ *             SkipLevel&gt;, SkipDatum?
+ *         <li>SkipLevel --&gt; &lt;SkipDatum&gt; <sup>TrimmedDocFreq/(PackedBlockSize^(Level +
+ *             1))</sup>
+ *         <li>SkipDatum --&gt; DocSkip, DocFPSkip, &lt;PosFPSkip, PosBlockOffset, PayLength?,
+ *             PayFPSkip?&gt;?, SkipChildLevelPointer?
+ *         <li>PackedDocDeltaBlock, PackedFreqBlock --&gt; {@link PackedInts PackedInts}
+ *         <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip
+ *             --&gt; {@link DataOutput#writeVInt VInt}
+ *         <li>SkipChildLevelPointer --&gt; {@link DataOutput#writeVLong VLong}
+ *         <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}
+ *       </ul>
+ *       <p>Notes:
+ *       <ul>
+ *         <li>PackedDocDeltaBlock is theoretically generated from two steps:
+ *             <ol>
+ *               <li>Calculate the difference between each document number and previous one, and get
+ *                   a d-gaps list (for the first document, use absolute value);
+ *               <li>For those d-gaps from first one to
+ *                   PackedDocBlockNum*PackedBlockSize<sup>th</sup>, separately encode as packed
+ *                   blocks.
+ *             </ol>
+ *             If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step.
+ *         <li>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a
+ *             format that encodes DocDelta and Freq:
+ *             <p>DocDelta: if frequencies are indexed, this determines both the document number and
+ *             the frequency. In particular, DocDelta/2 is the difference between this document
+ *             number and the previous document number (or zero when this is the first document in a
+ *             TermFreqs). When DocDelta is odd, the frequency is one. When DocDelta is even, the
+ *             frequency is read as another VInt. If frequencies are omitted, DocDelta contains the
+ *             gap (not multiplied by 2) between document numbers and no frequency information is
+ *             stored.
+ *             <p>For example, the TermFreqs for a term which occurs once in document seven and
+ *             three times in document eleven, with frequencies indexed, would be the following
+ *             sequence of VInts:
+ *             <p>15, 8, 3
+ *             <p>If frequencies were omitted ({@link IndexOptions#DOCS}) it would be this sequence
+ *             of VInts instead:
+ *             <p>7,4
+ *         <li>PackedDocBlockNum is the number of packed blocks for current term's docids or
+ *             frequencies. In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize)
+ *         <li>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq. We use this
+ *             trick since the definition of skip entry is a little different from base interface.
+ *             In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
+ *             skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list.
+ *             However, in Lucene50PostingsFormat, the skip data is saved for
+ *             skipInterval+1<sup>th</sup>, 2*skipInterval+1<sup>th</sup> ... posting
+ *             (skipInterval==PackedBlockSize in this case). When DocFreq is multiple of
+ *             PackedBlockSize, MultiLevelSkipListWriter will expect one more skip data than
+ *             Lucene50SkipWriter.
+ *         <li>SkipDatum is the metadata of one skip entry. For the first block (no matter packed or
+ *             VInt), it is omitted.
+ *         <li>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document
+ *             number in the postings (i.e. last document number in each packed block). On disk it
+ *             is stored as the difference from previous value in the sequence.
+ *         <li>DocFPSkip records the file offsets of each block (excluding )posting at
+ *             PackedBlockSize+1<sup>th</sup>, 2*PackedBlockSize+1<sup>th</sup> ... , in DocFile.
+ *             The file offsets are relative to the start of current term's TermFreqs. On disk it is
+ *             also stored as the difference from previous SkipDatum in the sequence.
+ *         <li>Since positions and payloads are also block encoded, the skip should skip to related
+ *             block first, then fetch the values according to in-block offset. PosFPSkip and
+ *             PayFPSkip record the file offsets of related block in .pos and .pay, respectively.
+ *             While PosBlockOffset indicates which value to fetch inside the related block
+ *             (PayBlockOffset is unnecessary since it is always equal to PosBlockOffset). Same as
+ *             DocFPSkip, the file offsets are relative to the start of current term's TermFreqs,
+ *             and stored as a difference sequence.
+ *         <li>PayByteUpto indicates the start offset of the current payload. It is equivalent to
+ *             the sum of the payload lengths in the current block up to PosBlockOffset
+ *       </ul>
+ * </dl>
+ *
+ * <a id="Positions"></a>
+ *
+ * <dl>
+ *   <dd><b>Positions</b>
+ *       <p>The .pos file contains the lists of positions that each term occurs at within documents.
+ *       It also sometimes stores part of payloads and offsets for speedup.
+ *       <ul>
+ *         <li>PosFile(.pos) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup>, Footer
+ *         <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}
+ *         <li>TermPositions --&gt; &lt;PackedPosDeltaBlock&gt; <sup>PackedPosBlockNum</sup>,
+ *             VIntBlock?
+ *         <li>VIntBlock --&gt; &lt;PositionDelta[, PayloadLength?], PayloadData?, OffsetDelta?,
+ *             OffsetLength?&gt;<sup>PosVIntCount</sup>
+ *         <li>PackedPosDeltaBlock --&gt; {@link PackedInts PackedInts}
+ *         <li>PositionDelta, OffsetDelta, OffsetLength --&gt; {@link DataOutput#writeVInt VInt}
+ *         <li>PayloadData --&gt; {@link DataOutput#writeByte byte}<sup>PayLength</sup>
+ *         <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}
+ *       </ul>
+ *       <p>Notes:
+ *       <ul>
+ *         <li>TermPositions are order by term (terms are implicit, from the term dictionary), and
+ *             position values for each term document pair are incremental, and ordered by document
+ *             number.
+ *         <li>PackedPosBlockNum is the number of packed blocks for current term's positions,
+ *             payloads or offsets. In particular, PackedPosBlockNum =
+ *             floor(totalTermFreq/PackedBlockSize)
+ *         <li>PosVIntCount is the number of positions encoded as VInt format. In particular,
+ *             PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize
+ *         <li>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock
+ *             in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.
+ *         <li>PositionDelta is, if payloads are disabled for the term's field, the difference
+ *             between the position of the current occurrence in the document and the previous
+ *             occurrence (or zero, if this is the first occurrence in this document). If payloads
+ *             are enabled for the term's field, then PositionDelta/2 is the difference between the
+ *             current and the previous position. If payloads are enabled and PositionDelta is odd,
+ *             then PayloadLength is stored, indicating the length of the payload at the current
+ *             term position.
+ *         <li>For example, the TermPositions for a term which occurs as the fourth term in one
+ *             document, and as the fifth and ninth term in a subsequent document, would be the
+ *             following sequence of VInts (payloads disabled):
+ *             <p>4, 5, 4
+ *         <li>PayloadData is metadata associated with the current term position. If PayloadLength
+ *             is stored at the current position, then it indicates the length of this payload. If
+ *             PayloadLength is not stored, then this payload has the same length as the payload at
+ *             the previous position.
+ *         <li>OffsetDelta/2 is the difference between this position's startOffset from the previous
+ *             occurrence (or zero, if this is the first occurrence in this document). If
+ *             OffsetDelta is odd, then the length (endOffset-startOffset) differs from the previous
+ *             occurrence and an OffsetLength follows. Offset data is only written for {@link
+ *             IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.
+ *       </ul>
+ * </dl>
+ *
+ * <a id="Payloads"></a>
+ *
+ * <dl>
+ *   <dd><b>Payloads and Offsets</b>
+ *       <p>The .pay file will store payloads and offsets associated with certain term-document
+ *       positions. Some payloads and offsets will be separated out into .pos file, for performance
+ *       reasons.
+ *       <ul>
+ *         <li>PayFile(.pay): --&gt; Header, &lt;TermPayloads, TermOffsets?&gt;
+ *             <sup>TermCount</sup>, Footer
+ *         <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}
+ *         <li>TermPayloads --&gt; &lt;PackedPayLengthBlock, SumPayLength, PayData&gt;
+ *             <sup>PackedPayBlockNum</sup>
+ *         <li>TermOffsets --&gt; &lt;PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock&gt;
+ *             <sup>PackedPayBlockNum</sup>
+ *         <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --&gt;
+ *             {@link PackedInts PackedInts}
+ *         <li>SumPayLength --&gt; {@link DataOutput#writeVInt VInt}
+ *         <li>PayData --&gt; {@link DataOutput#writeByte byte}<sup>SumPayLength</sup>
+ *         <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}
+ *       </ul>
+ *       <p>Notes:
+ *       <ul>
+ *         <li>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that
+ *             part of payload/offsets are stored in .pos.
+ *         <li>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is
+ *             the same as PackedFreqBlock in chapter <a href="#Frequencies">Frequencies and Skip
+ *             Data</a>. While PackedStartDeltaBlock follows a same procedure as
+ *             PackedDocDeltaBlock.
+ *         <li>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also
+ *             synonym for PackedOffsetBlockNum.
+ *         <li>SumPayLength is the total length of payloads written within one block, should be the
+ *             sum of PayLengths in one packed block.
+ *         <li>PayLength in PackedPayLengthBlock is the length of each payload associated with the
+ *             current position.
+ *       </ul>
+ * </dl>
+ *
+ * @lucene.experimental
+ */
+public class BWCLucene50PostingsFormat extends PostingsFormat {
+
+    /**
+     * Filename extension for document number, frequencies, and skip data. See chapter: <a
+     * href="#Frequencies">Frequencies and Skip Data</a>
+     */
+    public static final String DOC_EXTENSION = "doc";
+
+    /** Filename extension for positions. See chapter: <a href="#Positions">Positions</a> */
+    public static final String POS_EXTENSION = "pos";
+
+    /**
+     * Filename extension for payloads and offsets. See chapter: <a href="#Payloads">Payloads and
+     * Offsets</a>
+     */
+    public static final String PAY_EXTENSION = "pay";
+
+    /**
+     * Expert: The maximum number of skip levels. Smaller values result in slightly smaller indexes,
+     * but slower skipping in big posting lists.
+     */
+    static final int MAX_SKIP_LEVELS = 10;
+
+    static final String TERMS_CODEC = "Lucene50PostingsWriterTerms";
+    static final String DOC_CODEC = "Lucene50PostingsWriterDoc";
+    static final String POS_CODEC = "Lucene50PostingsWriterPos";
+    static final String PAY_CODEC = "Lucene50PostingsWriterPay";
+
+    // Increment version to change it
+    static final int VERSION_START = 0;
+    static final int VERSION_IMPACT_SKIP_DATA = 1;
+    static final int VERSION_CURRENT = VERSION_IMPACT_SKIP_DATA;
+
+    /** Fixed packed block size, number of integers encoded in a single packed block. */
+    // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
+    public static final int BLOCK_SIZE = 128;
+
+    /** Creates {@code Lucene50PostingsFormat} with default settings. */
+    public BWCLucene50PostingsFormat() {
+        super("Lucene50");
+    }
+
+    public BWCLucene50PostingsFormat(String name) {
+        super(name);
+    }
+
+    @Override
+    public String toString() {
+        return getName() + "(blocksize=" + BLOCK_SIZE + ")";
+    }
+
+    @Override
+    public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+        throw new UnsupportedOperationException("Old formats can't be used for writing");
+    }
+
+    @Override
+    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+        PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
+        boolean success = false;
+        try {
+            FieldsProducer ret = new Lucene40BlockTreeTermsReader(postingsReader, state);
+            success = true;
+            return ret;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(postingsReader);
+            }
+        }
+    }
+
+    /**
+     * Holds all state required for {@link Lucene50PostingsReader} to produce a {@link
+     * org.apache.lucene.index.PostingsEnum} without re-seeking the terms dict.
+     *
+     * @lucene.internal
+     */
+    public static final class IntBlockTermState extends BlockTermState {
+        /** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
+        public long docStartFP;
+        /** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
+        public long posStartFP;
+        /** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
+        public long payStartFP;
+        /**
+         * file offset for the start of the skip list, relative to docStartFP, if there are more than
+         * {@link #BLOCK_SIZE} docs; otherwise -1
+         */
+        public long skipOffset;
+        /**
+         * file offset for the last position in the last block, if there are more than {@link
+         * #BLOCK_SIZE} positions; otherwise -1
+         */
+        public long lastPosBlockOffset;
+        /**
+         * docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
+         * totalTermFreq in this case.
+         */
+        public int singletonDocID;
+
+        /** Sole constructor. */
+        public IntBlockTermState() {
+            skipOffset = -1;
+            lastPosBlockOffset = -1;
+            singletonDocID = -1;
+        }
+
+        @Override
+        public IntBlockTermState clone() {
+            IntBlockTermState other = new IntBlockTermState();
+            other.copyFrom(this);
+            return other;
+        }
+
+        @Override
+        public void copyFrom(TermState _other) {
+            super.copyFrom(_other);
+            IntBlockTermState other = (IntBlockTermState) _other;
+            docStartFP = other.docStartFP;
+            posStartFP = other.posStartFP;
+            payStartFP = other.payStartFP;
+            lastPosBlockOffset = other.lastPosBlockOffset;
+            skipOffset = other.skipOffset;
+            singletonDocID = other.singletonDocID;
+        }
+
+        @Override
+        public String toString() {
+            return super.toString()
+                + " docStartFP="
+                + docStartFP
+                + " posStartFP="
+                + posStartFP
+                + " payStartFP="
+                + payStartFP
+                + " lastPosBlockOffset="
+                + lastPosBlockOffset
+                + " singletonDocID="
+                + singletonDocID;
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/ForUtil.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/ForUtil.java
new file mode 100644
index 0000000000000..a567f25869407
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/ForUtil.java
@@ -0,0 +1,235 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedInts.Decoder;
+import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
+
+/**
+ * Encode all values in normal area with fixed bit width, which is determined by the max value in
+ * this block.
+ */
+final class ForUtil {
+
+    /** Special number of bits per value used whenever all values to encode are equal. */
+    private static final int ALL_VALUES_EQUAL = 0;
+
+    /**
+     * Upper limit of the number of bytes that might be required to stored <code>BLOCK_SIZE</code>
+     * encoded values.
+     */
+    static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
+
+    /**
+     * Upper limit of the number of values that might be decoded in a single call to {@link
+     * #readBlock(IndexInput, byte[], int[])}. Although values after <code>BLOCK_SIZE</code> are
+     * garbage, it is necessary to allocate value buffers whose size is {@code >= MAX_DATA_SIZE} to
+     * avoid {@link ArrayIndexOutOfBoundsException}s.
+     */
+    static final int MAX_DATA_SIZE;
+
+    static {
+        int maxDataSize = 0;
+        for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; version++) {
+            for (PackedInts.Format format : PackedInts.Format.values()) {
+                for (int bpv = 1; bpv <= 32; ++bpv) {
+                    if (format.isSupported(bpv) == false) {
+                        continue;
+                    }
+                    final Decoder decoder = PackedInts.getDecoder(format, version, bpv);
+                    final int iterations = computeIterations(decoder);
+                    maxDataSize = Math.max(maxDataSize, iterations * decoder.byteValueCount());
+                }
+            }
+        }
+        MAX_DATA_SIZE = maxDataSize;
+    }
+
+    /**
+     * Compute the number of iterations required to decode <code>BLOCK_SIZE</code> values with the
+     * provided {@link Decoder}.
+     */
+    private static int computeIterations(Decoder decoder) {
+        return (int) Math.ceil((float) BLOCK_SIZE / decoder.byteValueCount());
+    }
+
+    /**
+     * Compute the number of bytes required to encode a block of values that require <code>
+     * bitsPerValue</code> bits per value with format <code>format</code>.
+     */
+    private static int encodedSize(PackedInts.Format format, int packedIntsVersion, int bitsPerValue) {
+        final long byteCount = format.byteCount(packedIntsVersion, BLOCK_SIZE, bitsPerValue);
+        assert byteCount >= 0 && byteCount <= Integer.MAX_VALUE : byteCount;
+        return (int) byteCount;
+    }
+
+    private final int[] encodedSizes;
+    private final PackedInts.Encoder[] encoders;
+    private final Decoder[] decoders;
+    private final int[] iterations;
+
+    /** Create a new {@link ForUtil} instance and save state into <code>out</code>. */
+    ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
+        out.writeVInt(PackedInts.VERSION_CURRENT);
+        encodedSizes = new int[33];
+        encoders = new PackedInts.Encoder[33];
+        decoders = new Decoder[33];
+        iterations = new int[33];
+
+        for (int bpv = 1; bpv <= 32; ++bpv) {
+            final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv, acceptableOverheadRatio);
+            assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
+            assert formatAndBits.bitsPerValue <= 32;
+            encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
+            encoders[bpv] = PackedInts.getEncoder(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
+            decoders[bpv] = PackedInts.getDecoder(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
+            iterations[bpv] = computeIterations(decoders[bpv]);
+
+            out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
+        }
+    }
+
+    /** Restore a {@link ForUtil} from a {@link DataInput}. */
+    ForUtil(DataInput in) throws IOException {
+        int packedIntsVersion = in.readVInt();
+        PackedInts.checkVersion(packedIntsVersion);
+        encodedSizes = new int[33];
+        encoders = new PackedInts.Encoder[33];
+        decoders = new Decoder[33];
+        iterations = new int[33];
+
+        for (int bpv = 1; bpv <= 32; ++bpv) {
+            final int code = in.readVInt();
+            final int formatId = code >>> 5;
+            final int bitsPerValue = (code & 31) + 1;
+
+            final PackedInts.Format format = PackedInts.Format.byId(formatId);
+            assert format.isSupported(bitsPerValue);
+            encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue);
+            encoders[bpv] = PackedInts.getEncoder(format, packedIntsVersion, bitsPerValue);
+            decoders[bpv] = PackedInts.getDecoder(format, packedIntsVersion, bitsPerValue);
+            iterations[bpv] = computeIterations(decoders[bpv]);
+        }
+    }
+
+    /**
+     * Write a block of data (<code>For</code> format).
+     *
+     * @param data the data to write
+     * @param encoded a buffer to use to encode data
+     * @param out the destination output
+     * @throws IOException If there is a low-level I/O error
+     */
+    void writeBlock(int[] data, byte[] encoded, IndexOutput out) throws IOException {
+        if (isAllEqual(data)) {
+            out.writeByte((byte) ALL_VALUES_EQUAL);
+            out.writeVInt(data[0]);
+            return;
+        }
+
+        final int numBits = bitsRequired(data);
+        assert numBits > 0 && numBits <= 32 : numBits;
+        final PackedInts.Encoder encoder = encoders[numBits];
+        final int iters = iterations[numBits];
+        assert iters * encoder.byteValueCount() >= BLOCK_SIZE;
+        final int encodedSize = encodedSizes[numBits];
+        assert iters * encoder.byteBlockCount() >= encodedSize;
+
+        out.writeByte((byte) numBits);
+
+        encoder.encode(data, 0, encoded, 0, iters);
+        out.writeBytes(encoded, encodedSize);
+    }
+
+    /**
+     * Read the next block of data (<code>For</code> format).
+     *
+     * @param in the input to use to read data
+     * @param encoded a buffer that can be used to store encoded data
+     * @param decoded where to write decoded data
+     * @throws IOException If there is a low-level I/O error
+     */
+    void readBlock(IndexInput in, byte[] encoded, int[] decoded) throws IOException {
+        final int numBits = in.readByte();
+        assert numBits <= 32 : numBits;
+
+        if (numBits == ALL_VALUES_EQUAL) {
+            final int value = in.readVInt();
+            Arrays.fill(decoded, 0, BLOCK_SIZE, value);
+            return;
+        }
+
+        final int encodedSize = encodedSizes[numBits];
+        in.readBytes(encoded, 0, encodedSize);
+
+        final Decoder decoder = decoders[numBits];
+        final int iters = iterations[numBits];
+        assert iters * decoder.byteValueCount() >= BLOCK_SIZE;
+
+        decoder.decode(encoded, 0, decoded, 0, iters);
+    }
+
+    /**
+     * Skip the next block of data.
+     *
+     * @param in the input where to read data
+     * @throws IOException If there is a low-level I/O error
+     */
+    void skipBlock(IndexInput in) throws IOException {
+        final int numBits = in.readByte();
+        if (numBits == ALL_VALUES_EQUAL) {
+            in.readVInt();
+            return;
+        }
+        assert numBits > 0 && numBits <= 32 : numBits;
+        final int encodedSize = encodedSizes[numBits];
+        in.seek(in.getFilePointer() + encodedSize);
+    }
+
+    private static boolean isAllEqual(final int[] data) {
+        final int v = data[0];
+        for (int i = 1; i < BLOCK_SIZE; ++i) {
+            if (data[i] != v) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /** Compute the number of bits required to serialize any of the longs in <code>data</code>. */
+    private static int bitsRequired(final int[] data) {
+        long or = 0;
+        for (int i = 0; i < BLOCK_SIZE; ++i) {
+            assert data[i] >= 0;
+            or |= data[i];
+        }
+        return PackedInts.bitsRequired(or);
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java
new file mode 100644
index 0000000000000..206f5e1ae943b
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java
@@ -0,0 +1,1787 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Impacts;
+import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SlowImpactsEnum;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.IntBlockTermState;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.BLOCK_SIZE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.DOC_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.MAX_SKIP_LEVELS;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.PAY_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.POS_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.TERMS_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.VERSION_CURRENT;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.VERSION_START;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
+
+/**
+ * Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
+ *
+ * @lucene.experimental
+ */
+public final class Lucene50PostingsReader extends PostingsReaderBase {
+
+    private final IndexInput docIn;
+    private final IndexInput posIn;
+    private final IndexInput payIn;
+
+    final ForUtil forUtil;
+    private int version;
+
+    /** Sole constructor. */
+    public Lucene50PostingsReader(SegmentReadState state) throws IOException {
+        boolean success = false;
+        IndexInput docIn = null;
+        IndexInput posIn = null;
+        IndexInput payIn = null;
+
+        // NOTE: these data files are too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+
+        String docName = IndexFileNames.segmentFileName(
+            state.segmentInfo.name,
+            state.segmentSuffix,
+            BWCLucene50PostingsFormat.DOC_EXTENSION
+        );
+        try {
+            docIn = EndiannessReverserUtil.openInput(state.directory, docName, state.context);
+            version = CodecUtil.checkIndexHeader(
+                docIn,
+                DOC_CODEC,
+                VERSION_START,
+                VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            forUtil = new ForUtil(docIn);
+            CodecUtil.retrieveChecksum(docIn);
+
+            if (state.fieldInfos.hasProx()) {
+                String proxName = IndexFileNames.segmentFileName(
+                    state.segmentInfo.name,
+                    state.segmentSuffix,
+                    BWCLucene50PostingsFormat.POS_EXTENSION
+                );
+                posIn = EndiannessReverserUtil.openInput(state.directory, proxName, state.context);
+                CodecUtil.checkIndexHeader(posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+                CodecUtil.retrieveChecksum(posIn);
+
+                if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
+                    String payName = IndexFileNames.segmentFileName(
+                        state.segmentInfo.name,
+                        state.segmentSuffix,
+                        BWCLucene50PostingsFormat.PAY_EXTENSION
+                    );
+                    payIn = EndiannessReverserUtil.openInput(state.directory, payName, state.context);
+                    CodecUtil.checkIndexHeader(payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+                    CodecUtil.retrieveChecksum(payIn);
+                }
+            }
+
+            this.docIn = docIn;
+            this.posIn = posIn;
+            this.payIn = payIn;
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
+            }
+        }
+    }
+
+    @Override
+    public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
+        // Make sure we are talking to the matching postings writer
+        CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+        final int indexBlockSize = termsIn.readVInt();
+        if (indexBlockSize != BLOCK_SIZE) {
+            throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
+        }
+    }
+
+    /** Read values that have been written using variable-length encoding instead of bit-packing. */
+    static void readVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
+        if (indexHasFreq) {
+            for (int i = 0; i < num; i++) {
+                final int code = docIn.readVInt();
+                docBuffer[i] = code >>> 1;
+                if ((code & 1) != 0) {
+                    freqBuffer[i] = 1;
+                } else {
+                    freqBuffer[i] = docIn.readVInt();
+                }
+            }
+        } else {
+            for (int i = 0; i < num; i++) {
+                docBuffer[i] = docIn.readVInt();
+            }
+        }
+    }
+
+    @Override
+    public BlockTermState newTermState() {
+        return new IntBlockTermState();
+    }
+
+    @Override
+    public void close() throws IOException {
+        IOUtils.close(docIn, posIn, payIn);
+    }
+
+    @Override
+    public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException {
+        final IntBlockTermState termState = (IntBlockTermState) _termState;
+        final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+        final boolean fieldHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+        final boolean fieldHasPayloads = fieldInfo.hasPayloads();
+
+        if (absolute) {
+            termState.docStartFP = 0;
+            termState.posStartFP = 0;
+            termState.payStartFP = 0;
+        }
+
+        termState.docStartFP += in.readVLong();
+        if (fieldHasPositions) {
+            termState.posStartFP += in.readVLong();
+            if (fieldHasOffsets || fieldHasPayloads) {
+                termState.payStartFP += in.readVLong();
+            }
+        }
+        if (termState.docFreq == 1) {
+            termState.singletonDocID = in.readVInt();
+        } else {
+            termState.singletonDocID = -1;
+        }
+        if (fieldHasPositions) {
+            if (termState.totalTermFreq > BLOCK_SIZE) {
+                termState.lastPosBlockOffset = in.readVLong();
+            } else {
+                termState.lastPosBlockOffset = -1;
+            }
+        }
+        if (termState.docFreq > BLOCK_SIZE) {
+            termState.skipOffset = in.readVLong();
+        } else {
+            termState.skipOffset = -1;
+        }
+    }
+
+    @Override
+    public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) throws IOException {
+
+        boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+
+        if (indexHasPositions == false || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
+            BlockDocsEnum docsEnum;
+            if (reuse instanceof BlockDocsEnum) {
+                docsEnum = (BlockDocsEnum) reuse;
+                if (docsEnum.canReuse(docIn, fieldInfo) == false) {
+                    docsEnum = new BlockDocsEnum(fieldInfo);
+                }
+            } else {
+                docsEnum = new BlockDocsEnum(fieldInfo);
+            }
+            return docsEnum.reset((IntBlockTermState) termState, flags);
+        } else {
+            EverythingEnum everythingEnum;
+            if (reuse instanceof EverythingEnum) {
+                everythingEnum = (EverythingEnum) reuse;
+                if (everythingEnum.canReuse(docIn, fieldInfo) == false) {
+                    everythingEnum = new EverythingEnum(fieldInfo);
+                }
+            } else {
+                everythingEnum = new EverythingEnum(fieldInfo);
+            }
+            return everythingEnum.reset((IntBlockTermState) termState, flags);
+        }
+    }
+
+    @Override
+    public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
+        if (state.docFreq <= BLOCK_SIZE || version < BWCLucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+            // no skip data
+            return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
+        }
+
+        final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+        final boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+        final boolean indexHasPayloads = fieldInfo.hasPayloads();
+
+        if (indexHasPositions
+            && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)
+            && (indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
+            && (indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
+            return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
+        }
+
+        return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
+    }
+
+    final class BlockDocsEnum extends PostingsEnum {
+        private final byte[] encoded;
+
+        private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+        private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+
+        private int docBufferUpto;
+
+        private Lucene50SkipReader skipper;
+        private boolean skipped;
+
+        final IndexInput startDocIn;
+
+        IndexInput docIn;
+        final boolean indexHasFreq;
+        final boolean indexHasPos;
+        final boolean indexHasOffsets;
+        final boolean indexHasPayloads;
+
+        private int docFreq; // number of docs in this posting list
+        private long totalTermFreq; // sum of freqs in this posting list (or docFreq when omitted)
+        private int docUpto; // how many docs we've read
+        private int doc; // doc we last read
+        private int accum; // accumulator for doc deltas
+
+        // Where this term's postings start in the .doc file:
+        private long docTermStartFP;
+
+        // Where this term's skip data starts (after
+        // docTermStartFP) in the .doc file (or -1 if there is
+        // no skip data for this term):
+        private long skipOffset;
+
+        // docID for next skip point, we won't use skipper if
+        // target docID is not larger than this
+        private int nextSkipDoc;
+
+        private boolean needsFreq; // true if the caller actually needs frequencies
+        // as we read freqs lazily, isFreqsRead shows if freqs are read for the current block
+        // always true when we don't have freqs (indexHasFreq=false) or don't need freqs
+        // (needsFreq=false)
+        private boolean isFreqsRead;
+        private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
+
+        BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
+            this.startDocIn = Lucene50PostingsReader.this.docIn;
+            this.docIn = null;
+            indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+            indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+            indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+            indexHasPayloads = fieldInfo.hasPayloads();
+            encoded = new byte[MAX_ENCODED_SIZE];
+        }
+
+        public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
+            return docIn == startDocIn
+                && indexHasFreq == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
+                && indexHasPos == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
+                && indexHasPayloads == fieldInfo.hasPayloads();
+        }
+
+        public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
+            docFreq = termState.docFreq;
+            totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
+            docTermStartFP = termState.docStartFP;
+            skipOffset = termState.skipOffset;
+            singletonDocID = termState.singletonDocID;
+            if (docFreq > 1) {
+                if (docIn == null) {
+                    // lazy init
+                    docIn = startDocIn.clone();
+                }
+                docIn.seek(docTermStartFP);
+            }
+
+            doc = -1;
+            this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
+            this.isFreqsRead = true;
+            if (indexHasFreq == false || needsFreq == false) {
+                Arrays.fill(freqBuffer, 1);
+            }
+            accum = 0;
+            docUpto = 0;
+            nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+            docBufferUpto = BLOCK_SIZE;
+            skipped = false;
+            return this;
+        }
+
+        @Override
+        public int freq() throws IOException {
+            if (isFreqsRead == false) {
+                forUtil.readBlock(docIn, encoded, freqBuffer); // read freqs for this block
+                isFreqsRead = true;
+            }
+            return freqBuffer[docBufferUpto - 1];
+        }
+
+        @Override
+        public int nextPosition() throws IOException {
+            return -1;
+        }
+
+        @Override
+        public int startOffset() throws IOException {
+            return -1;
+        }
+
+        @Override
+        public int endOffset() throws IOException {
+            return -1;
+        }
+
+        @Override
+        public BytesRef getPayload() throws IOException {
+            return null;
+        }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
+        private void refillDocs() throws IOException {
+            // Check if we skipped reading the previous block of freqs, and if yes, position docIn after
+            // it
+            if (isFreqsRead == false) {
+                forUtil.skipBlock(docIn);
+                isFreqsRead = true;
+            }
+
+            final int left = docFreq - docUpto;
+            assert left > 0;
+
+            if (left >= BLOCK_SIZE) {
+                forUtil.readBlock(docIn, encoded, docDeltaBuffer);
+
+                if (indexHasFreq) {
+                    if (needsFreq) {
+                        isFreqsRead = false;
+                    } else {
+                        forUtil.skipBlock(docIn); // skip over freqs if we don't need them at all
+                    }
+                }
+            } else if (docFreq == 1) {
+                docDeltaBuffer[0] = singletonDocID;
+                freqBuffer[0] = (int) totalTermFreq;
+            } else {
+                // Read vInts:
+                readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
+            }
+            docBufferUpto = 0;
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            if (docUpto == docFreq) {
+                return doc = NO_MORE_DOCS;
+            }
+            if (docBufferUpto == BLOCK_SIZE) {
+                refillDocs(); // we don't need to load freqs for now (will be loaded later if necessary)
+            }
+
+            accum += docDeltaBuffer[docBufferUpto];
+            docUpto++;
+
+            doc = accum;
+            docBufferUpto++;
+            return doc;
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            // current skip docID < docIDs generated from current buffer <= next skip docID
+            // we don't need to skip if target is buffered already
+            if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+
+                if (skipper == null) {
+                    // Lazy init: first time this enum has ever been used for skipping
+                    skipper = new Lucene50SkipReader(
+                        version,
+                        docIn.clone(),
+                        MAX_SKIP_LEVELS,
+                        indexHasPos,
+                        indexHasOffsets,
+                        indexHasPayloads
+                    );
+                }
+
+                if (skipped == false) {
+                    assert skipOffset != -1;
+                    // This is the first time this enum has skipped
+                    // since reset() was called; load the skip data:
+                    skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
+                    skipped = true;
+                }
+
+                // always plus one to fix the result, since skip position in Lucene50SkipReader
+                // is a little different from MultiLevelSkipListReader
+                final int newDocUpto = skipper.skipTo(target) + 1;
+
+                if (newDocUpto > docUpto) {
+                    // Skipper moved
+                    assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+                    docUpto = newDocUpto;
+
+                    // Force to read next block
+                    docBufferUpto = BLOCK_SIZE;
+                    accum = skipper.getDoc(); // actually, this is just lastSkipEntry
+                    docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
+                    // even if freqs were not read from the previous block, we will mark them as read,
+                    // as we don't need to skip the previous block freqs in refillDocs,
+                    // as we have already positioned docIn where in needs to be.
+                    isFreqsRead = true;
+                }
+                // next time we call advance, this is used to
+                // foresee whether skipper is necessary.
+                nextSkipDoc = skipper.getNextSkipDoc();
+            }
+            if (docUpto == docFreq) {
+                return doc = NO_MORE_DOCS;
+            }
+            if (docBufferUpto == BLOCK_SIZE) {
+                refillDocs();
+            }
+
+            // Now scan... this is an inlined/pared down version
+            // of nextDoc():
+            while (true) {
+                accum += docDeltaBuffer[docBufferUpto];
+                docUpto++;
+
+                if (accum >= target) {
+                    break;
+                }
+                docBufferUpto++;
+                if (docUpto == docFreq) {
+                    return doc = NO_MORE_DOCS;
+                }
+            }
+
+            docBufferUpto++;
+            return doc = accum;
+        }
+
+        @Override
+        public long cost() {
+            return docFreq;
+        }
+    }
+
+    // Also handles payloads + offsets
+    final class EverythingEnum extends PostingsEnum {
+
+        private final byte[] encoded;
+
+        private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+        private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+        private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
+
+        private final int[] payloadLengthBuffer;
+        private final int[] offsetStartDeltaBuffer;
+        private final int[] offsetLengthBuffer;
+
+        private byte[] payloadBytes;
+        private int payloadByteUpto;
+        private int payloadLength;
+
+        private int lastStartOffset;
+        private int startOffset;
+        private int endOffset;
+
+        private int docBufferUpto;
+        private int posBufferUpto;
+
+        private Lucene50SkipReader skipper;
+        private boolean skipped;
+
+        final IndexInput startDocIn;
+
+        IndexInput docIn;
+        final IndexInput posIn;
+        final IndexInput payIn;
+        final BytesRef payload;
+
+        final boolean indexHasOffsets;
+        final boolean indexHasPayloads;
+
+        private int docFreq; // number of docs in this posting list
+        private long totalTermFreq; // number of positions in this posting list
+        private int docUpto; // how many docs we've read
+        private int doc; // doc we last read
+        private int accum; // accumulator for doc deltas
+        private int freq; // freq we last read
+        private int position; // current position
+
+        // how many positions "behind" we are; nextPosition must
+        // skip these to "catch up":
+        private int posPendingCount;
+
+        // Lazy pos seek: if != -1 then we must seek to this FP
+        // before reading positions:
+        private long posPendingFP;
+
+        // Lazy pay seek: if != -1 then we must seek to this FP
+        // before reading payloads/offsets:
+        private long payPendingFP;
+
+        // Where this term's postings start in the .doc file:
+        private long docTermStartFP;
+
+        // Where this term's postings start in the .pos file:
+        private long posTermStartFP;
+
+        // Where this term's payloads/offsets start in the .pay
+        // file:
+        private long payTermStartFP;
+
+        // File pointer where the last (vInt encoded) pos delta
+        // block is. We need this to know whether to bulk
+        // decode vs vInt decode the block:
+        private long lastPosBlockFP;
+
+        // Where this term's skip data starts (after
+        // docTermStartFP) in the .doc file (or -1 if there is
+        // no skip data for this term):
+        private long skipOffset;
+
+        private int nextSkipDoc;
+
+        private boolean needsOffsets; // true if we actually need offsets
+        private boolean needsPayloads; // true if we actually need payloads
+        private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
+
+        EverythingEnum(FieldInfo fieldInfo) throws IOException {
+            indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+            indexHasPayloads = fieldInfo.hasPayloads();
+
+            this.startDocIn = Lucene50PostingsReader.this.docIn;
+            this.docIn = null;
+            this.posIn = Lucene50PostingsReader.this.posIn.clone();
+            if (indexHasOffsets || indexHasPayloads) {
+                this.payIn = Lucene50PostingsReader.this.payIn.clone();
+            } else {
+                this.payIn = null;
+            }
+            encoded = new byte[MAX_ENCODED_SIZE];
+            if (indexHasOffsets) {
+                offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
+                offsetLengthBuffer = new int[MAX_DATA_SIZE];
+            } else {
+                offsetStartDeltaBuffer = null;
+                offsetLengthBuffer = null;
+                startOffset = -1;
+                endOffset = -1;
+            }
+
+            if (indexHasPayloads) {
+                payloadLengthBuffer = new int[MAX_DATA_SIZE];
+                payloadBytes = new byte[128];
+                payload = new BytesRef();
+            } else {
+                payloadLengthBuffer = null;
+                payloadBytes = null;
+                payload = null;
+            }
+        }
+
+        public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
+            return docIn == startDocIn
+                && indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0)
+                && indexHasPayloads == fieldInfo.hasPayloads();
+        }
+
+        public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOException {
+            docFreq = termState.docFreq;
+            docTermStartFP = termState.docStartFP;
+            posTermStartFP = termState.posStartFP;
+            payTermStartFP = termState.payStartFP;
+            skipOffset = termState.skipOffset;
+            totalTermFreq = termState.totalTermFreq;
+            singletonDocID = termState.singletonDocID;
+            if (docFreq > 1) {
+                if (docIn == null) {
+                    // lazy init
+                    docIn = startDocIn.clone();
+                }
+                docIn.seek(docTermStartFP);
+            }
+            posPendingFP = posTermStartFP;
+            payPendingFP = payTermStartFP;
+            posPendingCount = 0;
+            if (termState.totalTermFreq < BLOCK_SIZE) {
+                lastPosBlockFP = posTermStartFP;
+            } else if (termState.totalTermFreq == BLOCK_SIZE) {
+                lastPosBlockFP = -1;
+            } else {
+                lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+            }
+
+            this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
+            this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+
+            doc = -1;
+            accum = 0;
+            docUpto = 0;
+            if (docFreq > BLOCK_SIZE) {
+                nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+            } else {
+                nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
+            }
+            docBufferUpto = BLOCK_SIZE;
+            skipped = false;
+            return this;
+        }
+
+        @Override
+        public int freq() throws IOException {
+            return freq;
+        }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
+        private void refillDocs() throws IOException {
+            final int left = docFreq - docUpto;
+            assert left > 0;
+
+            if (left >= BLOCK_SIZE) {
+                forUtil.readBlock(docIn, encoded, docDeltaBuffer);
+                forUtil.readBlock(docIn, encoded, freqBuffer);
+            } else if (docFreq == 1) {
+                docDeltaBuffer[0] = singletonDocID;
+                freqBuffer[0] = (int) totalTermFreq;
+            } else {
+                readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
+            }
+            docBufferUpto = 0;
+        }
+
+        private void refillPositions() throws IOException {
+            if (posIn.getFilePointer() == lastPosBlockFP) {
+                final int count = (int) (totalTermFreq % BLOCK_SIZE);
+                int payloadLength = 0;
+                int offsetLength = 0;
+                payloadByteUpto = 0;
+                for (int i = 0; i < count; i++) {
+                    int code = posIn.readVInt();
+                    if (indexHasPayloads) {
+                        if ((code & 1) != 0) {
+                            payloadLength = posIn.readVInt();
+                        }
+                        payloadLengthBuffer[i] = payloadLength;
+                        posDeltaBuffer[i] = code >>> 1;
+                        if (payloadLength != 0) {
+                            if (payloadByteUpto + payloadLength > payloadBytes.length) {
+                                payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
+                            }
+                            posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
+                            payloadByteUpto += payloadLength;
+                        }
+                    } else {
+                        posDeltaBuffer[i] = code;
+                    }
+
+                    if (indexHasOffsets) {
+                        int deltaCode = posIn.readVInt();
+                        if ((deltaCode & 1) != 0) {
+                            offsetLength = posIn.readVInt();
+                        }
+                        offsetStartDeltaBuffer[i] = deltaCode >>> 1;
+                        offsetLengthBuffer[i] = offsetLength;
+                    }
+                }
+                payloadByteUpto = 0;
+            } else {
+                forUtil.readBlock(posIn, encoded, posDeltaBuffer);
+
+                if (indexHasPayloads) {
+                    if (needsPayloads) {
+                        forUtil.readBlock(payIn, encoded, payloadLengthBuffer);
+                        int numBytes = payIn.readVInt();
+
+                        if (numBytes > payloadBytes.length) {
+                            payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
+                        }
+                        payIn.readBytes(payloadBytes, 0, numBytes);
+                    } else {
+                        // this works, because when writing a vint block we always force the first length to be
+                        // written
+                        forUtil.skipBlock(payIn); // skip over lengths
+                        int numBytes = payIn.readVInt(); // read length of payloadBytes
+                        payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
+                    }
+                    payloadByteUpto = 0;
+                }
+
+                if (indexHasOffsets) {
+                    if (needsOffsets) {
+                        forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer);
+                        forUtil.readBlock(payIn, encoded, offsetLengthBuffer);
+                    } else {
+                        // this works, because when writing a vint block we always force the first length to be
+                        // written
+                        forUtil.skipBlock(payIn); // skip over starts
+                        forUtil.skipBlock(payIn); // skip over lengths
+                    }
+                }
+            }
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            if (docUpto == docFreq) {
+                return doc = NO_MORE_DOCS;
+            }
+            if (docBufferUpto == BLOCK_SIZE) {
+                refillDocs();
+            }
+
+            accum += docDeltaBuffer[docBufferUpto];
+            freq = freqBuffer[docBufferUpto];
+            posPendingCount += freq;
+            docBufferUpto++;
+            docUpto++;
+
+            doc = accum;
+            position = 0;
+            lastStartOffset = 0;
+            return doc;
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            // TODO: make frq block load lazy/skippable
+
+            if (target > nextSkipDoc) {
+                if (skipper == null) {
+                    // Lazy init: first time this enum has ever been used for skipping
+                    skipper = new Lucene50SkipReader(version, docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
+                }
+
+                if (skipped == false) {
+                    assert skipOffset != -1;
+                    // This is the first time this enum has skipped
+                    // since reset() was called; load the skip data:
+                    skipper.init(docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
+                    skipped = true;
+                }
+
+                final int newDocUpto = skipper.skipTo(target) + 1;
+
+                if (newDocUpto > docUpto) {
+                    // Skipper moved
+                    assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+                    docUpto = newDocUpto;
+
+                    // Force to read next block
+                    docBufferUpto = BLOCK_SIZE;
+                    accum = skipper.getDoc();
+                    docIn.seek(skipper.getDocPointer());
+                    posPendingFP = skipper.getPosPointer();
+                    payPendingFP = skipper.getPayPointer();
+                    posPendingCount = skipper.getPosBufferUpto();
+                    lastStartOffset = 0; // new document
+                    payloadByteUpto = skipper.getPayloadByteUpto();
+                }
+                nextSkipDoc = skipper.getNextSkipDoc();
+            }
+            if (docUpto == docFreq) {
+                return doc = NO_MORE_DOCS;
+            }
+            if (docBufferUpto == BLOCK_SIZE) {
+                refillDocs();
+            }
+
+            // Now scan:
+            while (true) {
+                accum += docDeltaBuffer[docBufferUpto];
+                freq = freqBuffer[docBufferUpto];
+                posPendingCount += freq;
+                docBufferUpto++;
+                docUpto++;
+
+                if (accum >= target) {
+                    break;
+                }
+                if (docUpto == docFreq) {
+                    return doc = NO_MORE_DOCS;
+                }
+            }
+
+            position = 0;
+            lastStartOffset = 0;
+            return doc = accum;
+        }
+
+        // TODO: in theory we could avoid loading frq block
+        // when not needed, ie, use skip data to load how far to
+        // seek the pos pointer ... instead of having to load frq
+        // blocks only to sum up how many positions to skip
+        private void skipPositions() throws IOException {
+            // Skip positions now:
+            int toSkip = posPendingCount - freq;
+            // if (DEBUG) {
+            // System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
+            // }
+
+            final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+            if (toSkip < leftInBlock) {
+                int end = posBufferUpto + toSkip;
+                while (posBufferUpto < end) {
+                    if (indexHasPayloads) {
+                        payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+                    }
+                    posBufferUpto++;
+                }
+            } else {
+                toSkip -= leftInBlock;
+                while (toSkip >= BLOCK_SIZE) {
+                    assert posIn.getFilePointer() != lastPosBlockFP;
+                    forUtil.skipBlock(posIn);
+
+                    if (indexHasPayloads) {
+                        // Skip payloadLength block:
+                        forUtil.skipBlock(payIn);
+
+                        // Skip payloadBytes block:
+                        int numBytes = payIn.readVInt();
+                        payIn.seek(payIn.getFilePointer() + numBytes);
+                    }
+
+                    if (indexHasOffsets) {
+                        forUtil.skipBlock(payIn);
+                        forUtil.skipBlock(payIn);
+                    }
+                    toSkip -= BLOCK_SIZE;
+                }
+                refillPositions();
+                payloadByteUpto = 0;
+                posBufferUpto = 0;
+                while (posBufferUpto < toSkip) {
+                    if (indexHasPayloads) {
+                        payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+                    }
+                    posBufferUpto++;
+                }
+            }
+
+            position = 0;
+            lastStartOffset = 0;
+        }
+
+        @Override
+        public int nextPosition() throws IOException {
+            assert posPendingCount > 0;
+
+            if (posPendingFP != -1) {
+                posIn.seek(posPendingFP);
+                posPendingFP = -1;
+
+                if (payPendingFP != -1 && payIn != null) {
+                    payIn.seek(payPendingFP);
+                    payPendingFP = -1;
+                }
+
+                // Force buffer refill:
+                posBufferUpto = BLOCK_SIZE;
+            }
+
+            if (posPendingCount > freq) {
+                skipPositions();
+                posPendingCount = freq;
+            }
+
+            if (posBufferUpto == BLOCK_SIZE) {
+                refillPositions();
+                posBufferUpto = 0;
+            }
+            position += posDeltaBuffer[posBufferUpto];
+
+            if (indexHasPayloads) {
+                payloadLength = payloadLengthBuffer[posBufferUpto];
+                payload.bytes = payloadBytes;
+                payload.offset = payloadByteUpto;
+                payload.length = payloadLength;
+                payloadByteUpto += payloadLength;
+            }
+
+            if (indexHasOffsets) {
+                startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
+                endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
+                lastStartOffset = startOffset;
+            }
+
+            posBufferUpto++;
+            posPendingCount--;
+            return position;
+        }
+
+        @Override
+        public int startOffset() {
+            return startOffset;
+        }
+
+        @Override
+        public int endOffset() {
+            return endOffset;
+        }
+
+        @Override
+        public BytesRef getPayload() {
+            if (payloadLength == 0) {
+                return null;
+            } else {
+                return payload;
+            }
+        }
+
+        @Override
+        public long cost() {
+            return docFreq;
+        }
+    }
+
+    final class BlockImpactsPostingsEnum extends ImpactsEnum {
+
+        private final byte[] encoded;
+
+        private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+        private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+        private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
+
+        private int docBufferUpto;
+        private int posBufferUpto;
+
+        private final Lucene50ScoreSkipReader skipper;
+
+        final IndexInput docIn;
+        final IndexInput posIn;
+
+        final boolean indexHasOffsets;
+        final boolean indexHasPayloads;
+
+        private int docFreq; // number of docs in this posting list
+        private long totalTermFreq; // number of positions in this posting list
+        private int docUpto; // how many docs we've read
+        private int doc; // doc we last read
+        private int accum; // accumulator for doc deltas
+        private int freq; // freq we last read
+        private int position; // current position
+
+        // how many positions "behind" we are; nextPosition must
+        // skip these to "catch up":
+        private int posPendingCount;
+
+        // Lazy pos seek: if != -1 then we must seek to this FP
+        // before reading positions:
+        private long posPendingFP;
+
+        // Where this term's postings start in the .doc file:
+        private long docTermStartFP;
+
+        // Where this term's postings start in the .pos file:
+        private long posTermStartFP;
+
+        // Where this term's payloads/offsets start in the .pay
+        // file:
+        private long payTermStartFP;
+
+        // File pointer where the last (vInt encoded) pos delta
+        // block is. We need this to know whether to bulk
+        // decode vs vInt decode the block:
+        private long lastPosBlockFP;
+
+        private int nextSkipDoc = -1;
+
+        private long seekTo = -1;
+
+        BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState) throws IOException {
+            indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+            indexHasPayloads = fieldInfo.hasPayloads();
+
+            this.docIn = Lucene50PostingsReader.this.docIn.clone();
+
+            encoded = new byte[MAX_ENCODED_SIZE];
+
+            this.posIn = Lucene50PostingsReader.this.posIn.clone();
+
+            docFreq = termState.docFreq;
+            docTermStartFP = termState.docStartFP;
+            posTermStartFP = termState.posStartFP;
+            payTermStartFP = termState.payStartFP;
+            totalTermFreq = termState.totalTermFreq;
+            docIn.seek(docTermStartFP);
+            posPendingFP = posTermStartFP;
+            posPendingCount = 0;
+            if (termState.totalTermFreq < BLOCK_SIZE) {
+                lastPosBlockFP = posTermStartFP;
+            } else if (termState.totalTermFreq == BLOCK_SIZE) {
+                lastPosBlockFP = -1;
+            } else {
+                lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+            }
+
+            doc = -1;
+            accum = 0;
+            docUpto = 0;
+            docBufferUpto = BLOCK_SIZE;
+
+            skipper = new Lucene50ScoreSkipReader(version, docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
+            skipper.init(docTermStartFP + termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
+        }
+
+        @Override
+        public int freq() throws IOException {
+            return freq;
+        }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
+        private void refillDocs() throws IOException {
+            final int left = docFreq - docUpto;
+            assert left > 0;
+
+            if (left >= BLOCK_SIZE) {
+                forUtil.readBlock(docIn, encoded, docDeltaBuffer);
+                forUtil.readBlock(docIn, encoded, freqBuffer);
+            } else {
+                readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
+            }
+            docBufferUpto = 0;
+        }
+
+        private void refillPositions() throws IOException {
+            if (posIn.getFilePointer() == lastPosBlockFP) {
+                final int count = (int) (totalTermFreq % BLOCK_SIZE);
+                int payloadLength = 0;
+                for (int i = 0; i < count; i++) {
+                    int code = posIn.readVInt();
+                    if (indexHasPayloads) {
+                        if ((code & 1) != 0) {
+                            payloadLength = posIn.readVInt();
+                        }
+                        posDeltaBuffer[i] = code >>> 1;
+                        if (payloadLength != 0) {
+                            posIn.seek(posIn.getFilePointer() + payloadLength);
+                        }
+                    } else {
+                        posDeltaBuffer[i] = code;
+                    }
+                    if (indexHasOffsets) {
+                        if ((posIn.readVInt() & 1) != 0) {
+                            // offset length changed
+                            posIn.readVInt();
+                        }
+                    }
+                }
+            } else {
+                forUtil.readBlock(posIn, encoded, posDeltaBuffer);
+            }
+        }
+
+        @Override
+        public void advanceShallow(int target) throws IOException {
+            if (target > nextSkipDoc) {
+                // always plus one to fix the result, since skip position in Lucene50SkipReader
+                // is a little different from MultiLevelSkipListReader
+                final int newDocUpto = skipper.skipTo(target) + 1;
+
+                if (newDocUpto > docUpto) {
+                    // Skipper moved
+                    assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+                    docUpto = newDocUpto;
+
+                    // Force to read next block
+                    docBufferUpto = BLOCK_SIZE;
+                    accum = skipper.getDoc();
+                    posPendingFP = skipper.getPosPointer();
+                    posPendingCount = skipper.getPosBufferUpto();
+                    seekTo = skipper.getDocPointer(); // delay the seek
+                }
+                // next time we call advance, this is used to
+                // foresee whether skipper is necessary.
+                nextSkipDoc = skipper.getNextSkipDoc();
+            }
+            assert nextSkipDoc >= target;
+        }
+
+        @Override
+        public Impacts getImpacts() throws IOException {
+            advanceShallow(doc);
+            return skipper.getImpacts();
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            return advance(doc + 1);
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            if (target > nextSkipDoc) {
+                advanceShallow(target);
+            }
+            if (docUpto == docFreq) {
+                return doc = NO_MORE_DOCS;
+            }
+            if (docBufferUpto == BLOCK_SIZE) {
+                if (seekTo >= 0) {
+                    docIn.seek(seekTo);
+                    seekTo = -1;
+                }
+                refillDocs();
+            }
+
+            // Now scan:
+            while (true) {
+                accum += docDeltaBuffer[docBufferUpto];
+                freq = freqBuffer[docBufferUpto];
+                posPendingCount += freq;
+                docBufferUpto++;
+                docUpto++;
+
+                if (accum >= target) {
+                    break;
+                }
+                if (docUpto == docFreq) {
+                    return doc = NO_MORE_DOCS;
+                }
+            }
+            position = 0;
+
+            return doc = accum;
+        }
+
+        // TODO: in theory we could avoid loading frq block
+        // when not needed, ie, use skip data to load how far to
+        // seek the pos pointer ... instead of having to load frq
+        // blocks only to sum up how many positions to skip
+        private void skipPositions() throws IOException {
+            // Skip positions now:
+            int toSkip = posPendingCount - freq;
+
+            final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+            if (toSkip < leftInBlock) {
+                posBufferUpto += toSkip;
+            } else {
+                toSkip -= leftInBlock;
+                while (toSkip >= BLOCK_SIZE) {
+                    assert posIn.getFilePointer() != lastPosBlockFP;
+                    forUtil.skipBlock(posIn);
+                    toSkip -= BLOCK_SIZE;
+                }
+                refillPositions();
+                posBufferUpto = toSkip;
+            }
+
+            position = 0;
+        }
+
+        @Override
+        public int nextPosition() throws IOException {
+            assert posPendingCount > 0;
+
+            if (posPendingFP != -1) {
+                posIn.seek(posPendingFP);
+                posPendingFP = -1;
+
+                // Force buffer refill:
+                posBufferUpto = BLOCK_SIZE;
+            }
+
+            if (posPendingCount > freq) {
+                skipPositions();
+                posPendingCount = freq;
+            }
+
+            if (posBufferUpto == BLOCK_SIZE) {
+                refillPositions();
+                posBufferUpto = 0;
+            }
+            position += posDeltaBuffer[posBufferUpto++];
+
+            posPendingCount--;
+            return position;
+        }
+
+        @Override
+        public int startOffset() {
+            return -1;
+        }
+
+        @Override
+        public int endOffset() {
+            return -1;
+        }
+
+        @Override
+        public BytesRef getPayload() {
+            return null;
+        }
+
+        @Override
+        public long cost() {
+            return docFreq;
+        }
+    }
+
+    final class BlockImpactsEverythingEnum extends ImpactsEnum {
+
+        private final byte[] encoded;
+
+        private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+        private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+        private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
+
+        private final int[] payloadLengthBuffer;
+        private final int[] offsetStartDeltaBuffer;
+        private final int[] offsetLengthBuffer;
+
+        private byte[] payloadBytes;
+        private int payloadByteUpto;
+        private int payloadLength;
+
+        private int lastStartOffset;
+        private int startOffset = -1;
+        private int endOffset = -1;
+
+        private int docBufferUpto;
+        private int posBufferUpto;
+
+        private final Lucene50ScoreSkipReader skipper;
+
+        final IndexInput docIn;
+        final IndexInput posIn;
+        final IndexInput payIn;
+        final BytesRef payload;
+
+        final boolean indexHasFreq;
+        final boolean indexHasPos;
+        final boolean indexHasOffsets;
+        final boolean indexHasPayloads;
+
+        private int docFreq; // number of docs in this posting list
+        private long totalTermFreq; // number of positions in this posting list
+        private int docUpto; // how many docs we've read
+        private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads
+        private int doc; // doc we last read
+        private int accum; // accumulator for doc deltas
+        private int position; // current position
+
+        // how many positions "behind" we are; nextPosition must
+        // skip these to "catch up":
+        private int posPendingCount;
+
+        // Lazy pos seek: if != -1 then we must seek to this FP
+        // before reading positions:
+        private long posPendingFP;
+
+        // Lazy pay seek: if != -1 then we must seek to this FP
+        // before reading payloads/offsets:
+        private long payPendingFP;
+
+        // Where this term's postings start in the .doc file:
+        private long docTermStartFP;
+
+        // Where this term's postings start in the .pos file:
+        private long posTermStartFP;
+
+        // Where this term's payloads/offsets start in the .pay
+        // file:
+        private long payTermStartFP;
+
+        // File pointer where the last (vInt encoded) pos delta
+        // block is. We need this to know whether to bulk
+        // decode vs vInt decode the block:
+        private long lastPosBlockFP;
+
+        private int nextSkipDoc = -1;
+
+        private final boolean needsPositions;
+        private final boolean needsOffsets; // true if we actually need offsets
+        private final boolean needsPayloads; // true if we actually need payloads
+
+        private boolean isFreqsRead; // shows if freqs for the current doc block are read into freqBuffer
+
+        private long seekTo = -1;
+
+        BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags) throws IOException {
+            indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+            indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+            indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+            indexHasPayloads = fieldInfo.hasPayloads();
+
+            needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS);
+            needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
+            needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+
+            this.docIn = Lucene50PostingsReader.this.docIn.clone();
+
+            encoded = new byte[MAX_ENCODED_SIZE];
+
+            if (indexHasPos && needsPositions) {
+                this.posIn = Lucene50PostingsReader.this.posIn.clone();
+            } else {
+                this.posIn = null;
+            }
+
+            if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) {
+                this.payIn = Lucene50PostingsReader.this.payIn.clone();
+            } else {
+                this.payIn = null;
+            }
+
+            if (indexHasOffsets) {
+                offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
+                offsetLengthBuffer = new int[MAX_DATA_SIZE];
+            } else {
+                offsetStartDeltaBuffer = null;
+                offsetLengthBuffer = null;
+                startOffset = -1;
+                endOffset = -1;
+            }
+
+            if (indexHasPayloads) {
+                payloadLengthBuffer = new int[MAX_DATA_SIZE];
+                payloadBytes = new byte[128];
+                payload = new BytesRef();
+            } else {
+                payloadLengthBuffer = null;
+                payloadBytes = null;
+                payload = null;
+            }
+
+            docFreq = termState.docFreq;
+            docTermStartFP = termState.docStartFP;
+            posTermStartFP = termState.posStartFP;
+            payTermStartFP = termState.payStartFP;
+            totalTermFreq = termState.totalTermFreq;
+            docIn.seek(docTermStartFP);
+            posPendingFP = posTermStartFP;
+            payPendingFP = payTermStartFP;
+            posPendingCount = 0;
+            if (termState.totalTermFreq < BLOCK_SIZE) {
+                lastPosBlockFP = posTermStartFP;
+            } else if (termState.totalTermFreq == BLOCK_SIZE) {
+                lastPosBlockFP = -1;
+            } else {
+                lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+            }
+
+            doc = -1;
+            accum = 0;
+            docUpto = 0;
+            posDocUpTo = 0;
+            isFreqsRead = true;
+            docBufferUpto = BLOCK_SIZE;
+
+            skipper = new Lucene50ScoreSkipReader(version, docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
+            skipper.init(docTermStartFP + termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
+
+            if (indexHasFreq == false) {
+                Arrays.fill(freqBuffer, 1);
+            }
+        }
+
+        @Override
+        public int freq() throws IOException {
+            if (indexHasFreq && (isFreqsRead == false)) {
+                forUtil.readBlock(docIn, encoded, freqBuffer); // read freqs for this block
+                isFreqsRead = true;
+            }
+            return freqBuffer[docBufferUpto - 1];
+        }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
+        private void refillDocs() throws IOException {
+            if (indexHasFreq) {
+                if (isFreqsRead == false) { // previous freq block was not read
+                    // check if we need to load the previous freq block to catch up on positions or we can
+                    // skip it
+                    if (indexHasPos && needsPositions && (posDocUpTo < docUpto)) {
+                        forUtil.readBlock(docIn, encoded, freqBuffer); // load the previous freq block
+                    } else {
+                        forUtil.skipBlock(docIn); // skip it
+                    }
+                    isFreqsRead = true;
+                }
+                if (indexHasPos && needsPositions) {
+                    while (posDocUpTo < docUpto) { // catch on positions, bring posPendingCount upto the current doc
+                        posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
+                        posDocUpTo++;
+                    }
+                }
+            }
+
+            final int left = docFreq - docUpto;
+            assert left > 0;
+
+            if (left >= BLOCK_SIZE) {
+                forUtil.readBlock(docIn, encoded, docDeltaBuffer);
+                if (indexHasFreq) {
+                    isFreqsRead = false; // freq block will be loaded lazily when necessary, we don't load it here
+                }
+            } else {
+                readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
+            }
+            docBufferUpto = 0;
+        }
+
+        private void refillPositions() throws IOException {
+            if (posIn.getFilePointer() == lastPosBlockFP) {
+                final int count = (int) (totalTermFreq % BLOCK_SIZE);
+                int payloadLength = 0;
+                int offsetLength = 0;
+                payloadByteUpto = 0;
+                for (int i = 0; i < count; i++) {
+                    int code = posIn.readVInt();
+                    if (indexHasPayloads) {
+                        if ((code & 1) != 0) {
+                            payloadLength = posIn.readVInt();
+                        }
+                        payloadLengthBuffer[i] = payloadLength;
+                        posDeltaBuffer[i] = code >>> 1;
+                        if (payloadLength != 0) {
+                            if (payloadByteUpto + payloadLength > payloadBytes.length) {
+                                payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
+                            }
+                            posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
+                            payloadByteUpto += payloadLength;
+                        }
+                    } else {
+                        posDeltaBuffer[i] = code;
+                    }
+
+                    if (indexHasOffsets) {
+                        int deltaCode = posIn.readVInt();
+                        if ((deltaCode & 1) != 0) {
+                            offsetLength = posIn.readVInt();
+                        }
+                        offsetStartDeltaBuffer[i] = deltaCode >>> 1;
+                        offsetLengthBuffer[i] = offsetLength;
+                    }
+                }
+                payloadByteUpto = 0;
+            } else {
+                forUtil.readBlock(posIn, encoded, posDeltaBuffer);
+
+                if (indexHasPayloads && payIn != null) {
+                    if (needsPayloads) {
+                        forUtil.readBlock(payIn, encoded, payloadLengthBuffer);
+                        int numBytes = payIn.readVInt();
+
+                        if (numBytes > payloadBytes.length) {
+                            payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
+                        }
+                        payIn.readBytes(payloadBytes, 0, numBytes);
+                    } else {
+                        // this works, because when writing a vint block we always force the first length to be
+                        // written
+                        forUtil.skipBlock(payIn); // skip over lengths
+                        int numBytes = payIn.readVInt(); // read length of payloadBytes
+                        payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
+                    }
+                    payloadByteUpto = 0;
+                }
+
+                if (indexHasOffsets && payIn != null) {
+                    if (needsOffsets) {
+                        forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer);
+                        forUtil.readBlock(payIn, encoded, offsetLengthBuffer);
+                    } else {
+                        // this works, because when writing a vint block we always force the first length to be
+                        // written
+                        forUtil.skipBlock(payIn); // skip over starts
+                        forUtil.skipBlock(payIn); // skip over lengths
+                    }
+                }
+            }
+        }
+
+        @Override
+        public void advanceShallow(int target) throws IOException {
+            if (target > nextSkipDoc) {
+                // always plus one to fix the result, since skip position in Lucene50SkipReader
+                // is a little different from MultiLevelSkipListReader
+                final int newDocUpto = skipper.skipTo(target) + 1;
+
+                if (newDocUpto > docUpto) {
+                    // Skipper moved
+                    assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+                    docUpto = newDocUpto;
+                    posDocUpTo = docUpto;
+
+                    // Force to read next block
+                    docBufferUpto = BLOCK_SIZE;
+                    accum = skipper.getDoc();
+                    posPendingFP = skipper.getPosPointer();
+                    payPendingFP = skipper.getPayPointer();
+                    posPendingCount = skipper.getPosBufferUpto();
+                    lastStartOffset = 0; // new document
+                    payloadByteUpto = skipper.getPayloadByteUpto(); // actually, this is just lastSkipEntry
+                    seekTo = skipper.getDocPointer(); // delay the seek
+                }
+                // next time we call advance, this is used to
+                // foresee whether skipper is necessary.
+                nextSkipDoc = skipper.getNextSkipDoc();
+            }
+            assert nextSkipDoc >= target;
+        }
+
+        @Override
+        public Impacts getImpacts() throws IOException {
+            advanceShallow(doc);
+            return skipper.getImpacts();
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            return advance(doc + 1);
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            if (target > nextSkipDoc) {
+                advanceShallow(target);
+            }
+            if (docUpto == docFreq) {
+                return doc = NO_MORE_DOCS;
+            }
+            if (docBufferUpto == BLOCK_SIZE) {
+                if (seekTo >= 0) {
+                    docIn.seek(seekTo);
+                    seekTo = -1;
+                    isFreqsRead = true; // reset isFreqsRead
+                }
+                refillDocs();
+            }
+
+            // Now scan:
+            while (true) {
+                accum += docDeltaBuffer[docBufferUpto];
+                docBufferUpto++;
+                docUpto++;
+
+                if (accum >= target) {
+                    break;
+                }
+                if (docUpto == docFreq) {
+                    return doc = NO_MORE_DOCS;
+                }
+            }
+            position = 0;
+            lastStartOffset = 0;
+
+            return doc = accum;
+        }
+
+        // TODO: in theory we could avoid loading frq block
+        // when not needed, ie, use skip data to load how far to
+        // seek the pos pointer ... instead of having to load frq
+        // blocks only to sum up how many positions to skip
+        private void skipPositions() throws IOException {
+            // Skip positions now:
+            int toSkip = posPendingCount - freqBuffer[docBufferUpto - 1];
+            // if (DEBUG) {
+            // System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
+            // }
+
+            final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+            if (toSkip < leftInBlock) {
+                int end = posBufferUpto + toSkip;
+                while (posBufferUpto < end) {
+                    if (indexHasPayloads) {
+                        payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+                    }
+                    posBufferUpto++;
+                }
+            } else {
+                toSkip -= leftInBlock;
+                while (toSkip >= BLOCK_SIZE) {
+                    assert posIn.getFilePointer() != lastPosBlockFP;
+                    forUtil.skipBlock(posIn);
+
+                    if (indexHasPayloads && payIn != null) {
+                        // Skip payloadLength block:
+                        forUtil.skipBlock(payIn);
+
+                        // Skip payloadBytes block:
+                        int numBytes = payIn.readVInt();
+                        payIn.seek(payIn.getFilePointer() + numBytes);
+                    }
+
+                    if (indexHasOffsets && payIn != null) {
+                        forUtil.skipBlock(payIn);
+                        forUtil.skipBlock(payIn);
+                    }
+                    toSkip -= BLOCK_SIZE;
+                }
+                refillPositions();
+                payloadByteUpto = 0;
+                posBufferUpto = 0;
+                while (posBufferUpto < toSkip) {
+                    if (indexHasPayloads) {
+                        payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+                    }
+                    posBufferUpto++;
+                }
+            }
+
+            position = 0;
+            lastStartOffset = 0;
+        }
+
+        @Override
+        public int nextPosition() throws IOException {
+            if (indexHasPos == false || needsPositions == false) {
+                return -1;
+            }
+
+            if (isFreqsRead == false) {
+                forUtil.readBlock(docIn, encoded, freqBuffer); // read freqs for this docs block
+                isFreqsRead = true;
+            }
+            while (posDocUpTo < docUpto) { // bring posPendingCount upto the current doc
+                posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
+                posDocUpTo++;
+            }
+
+            assert posPendingCount > 0;
+
+            if (posPendingFP != -1) {
+                posIn.seek(posPendingFP);
+                posPendingFP = -1;
+
+                if (payPendingFP != -1 && payIn != null) {
+                    payIn.seek(payPendingFP);
+                    payPendingFP = -1;
+                }
+
+                // Force buffer refill:
+                posBufferUpto = BLOCK_SIZE;
+            }
+
+            if (posPendingCount > freqBuffer[docBufferUpto - 1]) {
+                skipPositions();
+                posPendingCount = freqBuffer[docBufferUpto - 1];
+            }
+
+            if (posBufferUpto == BLOCK_SIZE) {
+                refillPositions();
+                posBufferUpto = 0;
+            }
+            position += posDeltaBuffer[posBufferUpto];
+
+            if (indexHasPayloads) {
+                payloadLength = payloadLengthBuffer[posBufferUpto];
+                payload.bytes = payloadBytes;
+                payload.offset = payloadByteUpto;
+                payload.length = payloadLength;
+                payloadByteUpto += payloadLength;
+            }
+
+            if (indexHasOffsets && needsOffsets) {
+                startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
+                endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
+                lastStartOffset = startOffset;
+            }
+
+            posBufferUpto++;
+            posPendingCount--;
+            return position;
+        }
+
+        @Override
+        public int startOffset() {
+            return startOffset;
+        }
+
+        @Override
+        public int endOffset() {
+            return endOffset;
+        }
+
+        @Override
+        public BytesRef getPayload() {
+            if (payloadLength == 0) {
+                return null;
+            } else {
+                return payload;
+            }
+        }
+
+        @Override
+        public long cost() {
+            return docFreq;
+        }
+    }
+
+    @Override
+    public void checkIntegrity() throws IOException {
+        if (docIn != null) {
+            CodecUtil.checksumEntireFile(docIn);
+        }
+        if (posIn != null) {
+            CodecUtil.checksumEntireFile(posIn);
+        }
+        if (payIn != null) {
+            CodecUtil.checksumEntireFile(payIn);
+        }
+    }
+
+    @Override
+    public String toString() {
+        return getClass().getSimpleName() + "(positions=" + (posIn != null) + ",payloads=" + (payIn != null) + ")";
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50ScoreSkipReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50ScoreSkipReader.java
new file mode 100644
index 0000000000000..e27e95f2601a2
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50ScoreSkipReader.java
@@ -0,0 +1,167 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.index.Impacts;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+
+import java.io.IOException;
+import java.util.AbstractList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.RandomAccess;
+
+final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
+
+    private final byte[][] impactData;
+    private final int[] impactDataLength;
+    private final ByteArrayDataInput badi = new ByteArrayDataInput();
+    private final Impacts impacts;
+    private int numLevels = 1;
+    private final MutableImpactList[] perLevelImpacts;
+
+    Lucene50ScoreSkipReader(
+        int version,
+        IndexInput skipStream,
+        int maxSkipLevels,
+        boolean hasPos,
+        boolean hasOffsets,
+        boolean hasPayloads
+    ) {
+        super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
+        if (version < BWCLucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+            throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
+        }
+        this.impactData = new byte[maxSkipLevels][];
+        Arrays.fill(impactData, new byte[0]);
+        this.impactDataLength = new int[maxSkipLevels];
+        this.perLevelImpacts = new MutableImpactList[maxSkipLevels];
+        for (int i = 0; i < perLevelImpacts.length; ++i) {
+            perLevelImpacts[i] = new MutableImpactList();
+        }
+        impacts = new Impacts() {
+
+            @Override
+            public int numLevels() {
+                return numLevels;
+            }
+
+            @Override
+            public int getDocIdUpTo(int level) {
+                return skipDoc[level];
+            }
+
+            @Override
+            public List<Impact> getImpacts(int level) {
+                assert level < numLevels;
+                if (impactDataLength[level] > 0) {
+                    badi.reset(impactData[level], 0, impactDataLength[level]);
+                    perLevelImpacts[level] = readImpacts(badi, perLevelImpacts[level]);
+                    impactDataLength[level] = 0;
+                }
+                return perLevelImpacts[level];
+            }
+        };
+    }
+
+    @Override
+    public int skipTo(int target) throws IOException {
+        int result = super.skipTo(target);
+        if (numberOfSkipLevels > 0) {
+            numLevels = numberOfSkipLevels;
+        } else {
+            // End of postings don't have skip data anymore, so we fill with dummy data
+            // like SlowImpactsEnum.
+            numLevels = 1;
+            perLevelImpacts[0].length = 1;
+            perLevelImpacts[0].impacts[0].freq = Integer.MAX_VALUE;
+            perLevelImpacts[0].impacts[0].norm = 1L;
+            impactDataLength[0] = 0;
+        }
+        return result;
+    }
+
+    Impacts getImpacts() {
+        return impacts;
+    }
+
+    @Override
+    protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+        int length = skipStream.readVInt();
+        if (impactData[level].length < length) {
+            impactData[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
+        }
+        skipStream.readBytes(impactData[level], 0, length);
+        impactDataLength[level] = length;
+    }
+
+    static MutableImpactList readImpacts(ByteArrayDataInput in, MutableImpactList reuse) {
+        int maxNumImpacts = in.length(); // at most one impact per byte
+        if (reuse.impacts.length < maxNumImpacts) {
+            int oldLength = reuse.impacts.length;
+            reuse.impacts = ArrayUtil.grow(reuse.impacts, maxNumImpacts);
+            for (int i = oldLength; i < reuse.impacts.length; ++i) {
+                reuse.impacts[i] = new Impact(Integer.MAX_VALUE, 1L);
+            }
+        }
+
+        int freq = 0;
+        long norm = 0;
+        int length = 0;
+        while (in.getPosition() < in.length()) {
+            int freqDelta = in.readVInt();
+            if ((freqDelta & 0x01) != 0) {
+                freq += 1 + (freqDelta >>> 1);
+                try {
+                    norm += 1 + in.readZLong();
+                } catch (IOException e) {
+                    throw new RuntimeException(e); // cannot happen on a BADI
+                }
+            } else {
+                freq += 1 + (freqDelta >>> 1);
+                norm++;
+            }
+            Impact impact = reuse.impacts[length];
+            impact.freq = freq;
+            impact.norm = norm;
+            length++;
+        }
+        reuse.length = length;
+        return reuse;
+    }
+
+    static class MutableImpactList extends AbstractList<Impact> implements RandomAccess {
+        int length = 1;
+        Impact[] impacts = new Impact[] { new Impact(Integer.MAX_VALUE, 1L) };
+
+        @Override
+        public Impact get(int index) {
+            return impacts[index];
+        }
+
+        @Override
+        public int size() {
+            return length;
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipReader.java
new file mode 100644
index 0000000000000..369d7e103d839
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipReader.java
@@ -0,0 +1,210 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.codecs.MultiLevelSkipListReader;
+import org.apache.lucene.store.IndexInput;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
+
+/**
+ * Implements the skip list reader for block postings format that stores positions and payloads.
+ *
+ * <p>Although this skipper uses MultiLevelSkipListReader as an interface, its definition of skip
+ * position will be a little different.
+ *
+ * <p>For example, when skipInterval = blockSize = 3, df = 2*skipInterval = 6,
+ *
+ * <pre>
+ * 0 1 2 3 4 5
+ * d d d d d d    (posting list)
+ *     ^     ^    (skip point in MultiLeveSkipWriter)
+ *       ^        (skip point in Lucene50SkipWriter)
+ * </pre>
+ *
+ * In this case, MultiLevelSkipListReader will use the last document as a skip point, while
+ * Lucene50SkipReader should assume no skip point will comes.
+ *
+ * <p>If we use the interface directly in Lucene50SkipReader, it may silly try to read another skip
+ * data after the only skip point is loaded.
+ *
+ * <p>To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, and
+ * numSkipped+blockSize== df, the MultiLevelSkipListReader will assume the skip list isn't exhausted
+ * yet, and try to load a non-existed skip point
+ *
+ * <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
+ */
+class Lucene50SkipReader extends MultiLevelSkipListReader {
+    private final int version;
+    private long[] docPointer;
+    private long[] posPointer;
+    private long[] payPointer;
+    private int[] posBufferUpto;
+    private int[] payloadByteUpto;
+
+    private long lastPosPointer;
+    private long lastPayPointer;
+    private int lastPayloadByteUpto;
+    private long lastDocPointer;
+    private int lastPosBufferUpto;
+
+    Lucene50SkipReader(int version, IndexInput skipStream, int maxSkipLevels, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
+        super(skipStream, maxSkipLevels, BLOCK_SIZE, 8);
+        this.version = version;
+        docPointer = new long[maxSkipLevels];
+        if (hasPos) {
+            posPointer = new long[maxSkipLevels];
+            posBufferUpto = new int[maxSkipLevels];
+            if (hasPayloads) {
+                payloadByteUpto = new int[maxSkipLevels];
+            } else {
+                payloadByteUpto = null;
+            }
+            if (hasOffsets || hasPayloads) {
+                payPointer = new long[maxSkipLevels];
+            } else {
+                payPointer = null;
+            }
+        } else {
+            posPointer = null;
+        }
+    }
+
+    /**
+     * Trim original docFreq to tell skipReader read proper number of skip points.
+     *
+     * <p>Since our definition in Lucene50Skip* is a little different from MultiLevelSkip* This
+     * trimmed docFreq will prevent skipReader from: 1. silly reading a non-existed skip point after
+     * the last block boundary 2. moving into the vInt block
+     */
+    protected int trim(int df) {
+        return df % BLOCK_SIZE == 0 ? df - 1 : df;
+    }
+
+    public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException {
+        super.init(skipPointer, trim(df));
+        lastDocPointer = docBasePointer;
+        lastPosPointer = posBasePointer;
+        lastPayPointer = payBasePointer;
+
+        Arrays.fill(docPointer, docBasePointer);
+        if (posPointer != null) {
+            Arrays.fill(posPointer, posBasePointer);
+            if (payPointer != null) {
+                Arrays.fill(payPointer, payBasePointer);
+            }
+        } else {
+            assert posBasePointer == 0;
+        }
+    }
+
+    /**
+     * Returns the doc pointer of the doc to which the last call of {@link
+     * MultiLevelSkipListReader#skipTo(int)} has skipped.
+     */
+    public long getDocPointer() {
+        return lastDocPointer;
+    }
+
+    public long getPosPointer() {
+        return lastPosPointer;
+    }
+
+    public int getPosBufferUpto() {
+        return lastPosBufferUpto;
+    }
+
+    public long getPayPointer() {
+        return lastPayPointer;
+    }
+
+    public int getPayloadByteUpto() {
+        return lastPayloadByteUpto;
+    }
+
+    public int getNextSkipDoc() {
+        return skipDoc[0];
+    }
+
+    @Override
+    protected void seekChild(int level) throws IOException {
+        super.seekChild(level);
+        docPointer[level] = lastDocPointer;
+        if (posPointer != null) {
+            posPointer[level] = lastPosPointer;
+            posBufferUpto[level] = lastPosBufferUpto;
+            if (payloadByteUpto != null) {
+                payloadByteUpto[level] = lastPayloadByteUpto;
+            }
+            if (payPointer != null) {
+                payPointer[level] = lastPayPointer;
+            }
+        }
+    }
+
+    @Override
+    protected void setLastSkipData(int level) {
+        super.setLastSkipData(level);
+        lastDocPointer = docPointer[level];
+
+        if (posPointer != null) {
+            lastPosPointer = posPointer[level];
+            lastPosBufferUpto = posBufferUpto[level];
+            if (payPointer != null) {
+                lastPayPointer = payPointer[level];
+            }
+            if (payloadByteUpto != null) {
+                lastPayloadByteUpto = payloadByteUpto[level];
+            }
+        }
+    }
+
+    @Override
+    protected int readSkipData(int level, IndexInput skipStream) throws IOException {
+        int delta = skipStream.readVInt();
+        docPointer[level] += skipStream.readVLong();
+
+        if (posPointer != null) {
+            posPointer[level] += skipStream.readVLong();
+            posBufferUpto[level] = skipStream.readVInt();
+
+            if (payloadByteUpto != null) {
+                payloadByteUpto[level] = skipStream.readVInt();
+            }
+
+            if (payPointer != null) {
+                payPointer[level] += skipStream.readVLong();
+            }
+        }
+        readImpacts(level, skipStream);
+        return delta;
+    }
+
+    // The default impl skips impacts
+    protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+        if (version >= BWCLucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+            // The base implementation skips impacts, they are not used
+            skipStream.skipBytes(skipStream.readVInt());
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
index 43a24574297c3..55fe5c3b98f64 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
@@ -27,10 +27,13 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
+import org.elasticsearch.xpack.lucene.bwc.codecs.LegacyAdaptingPerFieldPostingsFormat;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.Lucene50SegmentInfoFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat;
 
@@ -55,6 +58,21 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
             return defaultDocValuesFormat;
         }
     };
+    private final PostingsFormat postingsFormat = new LegacyAdaptingPerFieldPostingsFormat() {
+        @Override
+        public PostingsFormat getPostingsFormatForField(String field) {
+            throw new IllegalStateException("This codec should only be used for reading, not writing");
+        }
+
+        @Override
+        protected PostingsFormat getPostingsFormat(String formatName) {
+            if (formatName.equals("Lucene50")) {
+                return new BWCLucene50PostingsFormat();
+            } else {
+                return super.getPostingsFormat(formatName);
+            }
+        }
+    };
 
     /**
      * Instantiates a new codec.
@@ -104,4 +122,8 @@ public DocValuesFormat docValuesFormat() {
         return docValuesFormat;
     }
 
+    @Override
+    public PostingsFormat postingsFormat() {
+        return postingsFormat;
+    }
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
index 2f805a4881744..e3317a1c00c8c 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
@@ -27,10 +27,13 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
+import org.elasticsearch.xpack.lucene.bwc.codecs.LegacyAdaptingPerFieldPostingsFormat;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat;
 
 import java.util.Objects;
@@ -54,6 +57,21 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
             return defaultDocValuesFormat;
         }
     };
+    private final PostingsFormat postingsFormat = new LegacyAdaptingPerFieldPostingsFormat() {
+        @Override
+        public PostingsFormat getPostingsFormatForField(String field) {
+            throw new IllegalStateException("This codec should only be used for reading, not writing");
+        }
+
+        @Override
+        protected PostingsFormat getPostingsFormat(String formatName) {
+            if (formatName.equals("Lucene50")) {
+                return new BWCLucene50PostingsFormat();
+            } else {
+                return super.getPostingsFormat(formatName);
+            }
+        }
+    };
 
     public Lucene62Codec() {
         this(Lucene50StoredFieldsFormat.Mode.BEST_SPEED);
@@ -93,4 +111,9 @@ public final CompoundFormat compoundFormat() {
     public DocValuesFormat docValuesFormat() {
         return docValuesFormat;
     }
+
+    @Override
+    public PostingsFormat postingsFormat() {
+        return postingsFormat;
+    }
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
index bc9fa098476c1..90739206b5643 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
@@ -18,9 +18,11 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
 
 public class BWCLucene70Codec extends BWCCodec {
@@ -37,6 +39,12 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
             return defaultDVFormat;
         }
     };
+    private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+        @Override
+        public PostingsFormat getPostingsFormatForField(String field) {
+            throw new IllegalStateException("This codec should only be used for reading, not writing");
+        }
+    };
 
     public BWCLucene70Codec() {
         super("BWCLucene70Codec");
@@ -72,4 +80,9 @@ public CompoundFormat compoundFormat() {
     public final DocValuesFormat docValuesFormat() {
         return docValuesFormat;
     }
+
+    @Override
+    public PostingsFormat postingsFormat() {
+        return postingsFormat;
+    }
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java
new file mode 100644
index 0000000000000..728191932763c
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java
@@ -0,0 +1,176 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import java.io.IOException;
+
+/**
+ * Static helper methods for {@link FST.Arc.BitTable}.
+ *
+ * @lucene.experimental
+ */
+class BitTableUtil {
+
+    /**
+     * Returns whether the bit at given zero-based index is set. <br>
+     * Example: bitIndex 10 means the third bit on the right of the second byte.
+     *
+     * @param bitIndex The bit zero-based index. It must be greater than or equal to 0, and strictly
+     *     less than {@code number of bit-table bytes * Byte.SIZE}.
+     * @param reader The {@link FST.BytesReader} to read. It must be positioned at the beginning of
+     *     the bit-table.
+     */
+    static boolean isBitSet(int bitIndex, FST.BytesReader reader) throws IOException {
+        assert bitIndex >= 0 : "bitIndex=" + bitIndex;
+        reader.skipBytes(bitIndex >> 3);
+        return (readByte(reader) & (1L << (bitIndex & (Byte.SIZE - 1)))) != 0;
+    }
+
+    /**
+     * Counts all bits set in the bit-table.
+     *
+     * @param bitTableBytes The number of bytes in the bit-table.
+     * @param reader The {@link FST.BytesReader} to read. It must be positioned at the beginning of
+     *     the bit-table.
+     */
+    static int countBits(int bitTableBytes, FST.BytesReader reader) throws IOException {
+        assert bitTableBytes >= 0 : "bitTableBytes=" + bitTableBytes;
+        int bitCount = 0;
+        for (int i = bitTableBytes >> 3; i > 0; i--) {
+            // Count the bits set for all plain longs.
+            bitCount += bitCount8Bytes(reader);
+        }
+        int numRemainingBytes;
+        if ((numRemainingBytes = bitTableBytes & (Long.BYTES - 1)) != 0) {
+            bitCount += Long.bitCount(readUpTo8Bytes(numRemainingBytes, reader));
+        }
+        return bitCount;
+    }
+
+    /**
+     * Counts the bits set up to the given bit zero-based index, exclusive. <br>
+     * In other words, how many 1s there are up to the bit at the given index excluded. <br>
+     * Example: bitIndex 10 means the third bit on the right of the second byte.
+     *
+     * @param bitIndex The bit zero-based index, exclusive. It must be greater than or equal to 0, and
+     *     less than or equal to {@code number of bit-table bytes * Byte.SIZE}.
+     * @param reader The {@link FST.BytesReader} to read. It must be positioned at the beginning of
+     *     the bit-table.
+     */
+    static int countBitsUpTo(int bitIndex, FST.BytesReader reader) throws IOException {
+        assert bitIndex >= 0 : "bitIndex=" + bitIndex;
+        int bitCount = 0;
+        for (int i = bitIndex >> 6; i > 0; i--) {
+            // Count the bits set for all plain longs.
+            bitCount += bitCount8Bytes(reader);
+        }
+        int remainingBits;
+        if ((remainingBits = bitIndex & (Long.SIZE - 1)) != 0) {
+            int numRemainingBytes = (remainingBits + (Byte.SIZE - 1)) >> 3;
+            // Prepare a mask with 1s on the right up to bitIndex exclusive.
+            long mask = (1L << bitIndex) - 1L; // Shifts are mod 64.
+            // Count the bits set only within the mask part, so up to bitIndex exclusive.
+            bitCount += Long.bitCount(readUpTo8Bytes(numRemainingBytes, reader) & mask);
+        }
+        return bitCount;
+    }
+
+    /**
+     * Returns the index of the next bit set following the given bit zero-based index. <br>
+     * For example with bits 100011: the next bit set after index=-1 is at index=0; the next bit set
+     * after index=0 is at index=1; the next bit set after index=1 is at index=5; there is no next bit
+     * set after index=5.
+     *
+     * @param bitIndex The bit zero-based index. It must be greater than or equal to -1, and strictly
+     *     less than {@code number of bit-table bytes * Byte.SIZE}.
+     * @param bitTableBytes The number of bytes in the bit-table.
+     * @param reader The {@link FST.BytesReader} to read. It must be positioned at the beginning of
+     *     the bit-table.
+     * @return The zero-based index of the next bit set after the provided {@code bitIndex}; or -1 if
+     *     none.
+     */
+    static int nextBitSet(int bitIndex, int bitTableBytes, FST.BytesReader reader) throws IOException {
+        assert bitIndex >= -1 && bitIndex < bitTableBytes * Byte.SIZE : "bitIndex=" + bitIndex + " bitTableBytes=" + bitTableBytes;
+        int byteIndex = bitIndex / Byte.SIZE;
+        int mask = -1 << ((bitIndex + 1) & (Byte.SIZE - 1));
+        int i;
+        if (mask == -1 && bitIndex != -1) {
+            reader.skipBytes(byteIndex + 1);
+            i = 0;
+        } else {
+            reader.skipBytes(byteIndex);
+            i = (reader.readByte() & 0xFF) & mask;
+        }
+        while (i == 0) {
+            if (++byteIndex == bitTableBytes) {
+                return -1;
+            }
+            i = reader.readByte() & 0xFF;
+        }
+        return Integer.numberOfTrailingZeros(i) + (byteIndex << 3);
+    }
+
+    /**
+     * Returns the index of the previous bit set preceding the given bit zero-based index. <br>
+     * For example with bits 100011: there is no previous bit set before index=0. the previous bit set
+     * before index=1 is at index=0; the previous bit set before index=5 is at index=1; the previous
+     * bit set before index=64 is at index=5;
+     *
+     * @param bitIndex The bit zero-based index. It must be greater than or equal to 0, and less than
+     *     or equal to {@code number of bit-table bytes * Byte.SIZE}.
+     * @param reader The {@link FST.BytesReader} to read. It must be positioned at the beginning of
+     *     the bit-table.
+     * @return The zero-based index of the previous bit set before the provided {@code bitIndex}; or
+     *     -1 if none.
+     */
+    static int previousBitSet(int bitIndex, FST.BytesReader reader) throws IOException {
+        assert bitIndex >= 0 : "bitIndex=" + bitIndex;
+        int byteIndex = bitIndex >> 3;
+        reader.skipBytes(byteIndex);
+        int mask = (1 << (bitIndex & (Byte.SIZE - 1))) - 1;
+        int i = (reader.readByte() & 0xFF) & mask;
+        while (i == 0) {
+            if (byteIndex-- == 0) {
+                return -1;
+            }
+            reader.skipBytes(-2); // FST.BytesReader implementations support negative skip.
+            i = reader.readByte() & 0xFF;
+        }
+        return (Integer.SIZE - 1) - Integer.numberOfLeadingZeros(i) + (byteIndex << 3);
+    }
+
+    private static long readByte(FST.BytesReader reader) throws IOException {
+        return reader.readByte() & 0xFFL;
+    }
+
+    private static long readUpTo8Bytes(int numBytes, FST.BytesReader reader) throws IOException {
+        assert numBytes > 0 && numBytes <= 8 : "numBytes=" + numBytes;
+        long l = readByte(reader);
+        int shift = 0;
+        while (--numBytes != 0) {
+            l |= readByte(reader) << (shift += 8);
+        }
+        return l;
+    }
+
+    private static int bitCount8Bytes(FST.BytesReader reader) throws IOException {
+        return Long.bitCount(reader.readLong());
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java
new file mode 100644
index 0000000000000..7a58a350fcab1
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java
@@ -0,0 +1,164 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
+
+import java.io.IOException;
+
+/**
+ * An FST {@link Outputs} implementation where each output is a sequence of bytes.
+ *
+ * @lucene.experimental
+ */
+public final class ByteSequenceOutputs extends Outputs<BytesRef> {
+
+    private static final BytesRef NO_OUTPUT = new BytesRef();
+    private static final ByteSequenceOutputs singleton = new ByteSequenceOutputs();
+
+    private ByteSequenceOutputs() {}
+
+    public static ByteSequenceOutputs getSingleton() {
+        return singleton;
+    }
+
+    @Override
+    public BytesRef common(BytesRef output1, BytesRef output2) {
+        assert output1 != null;
+        assert output2 != null;
+
+        int pos1 = output1.offset;
+        int pos2 = output2.offset;
+        int stopAt1 = pos1 + Math.min(output1.length, output2.length);
+        while (pos1 < stopAt1) {
+            if (output1.bytes[pos1] != output2.bytes[pos2]) {
+                break;
+            }
+            pos1++;
+            pos2++;
+        }
+
+        if (pos1 == output1.offset) {
+            // no common prefix
+            return NO_OUTPUT;
+        } else if (pos1 == output1.offset + output1.length) {
+            // output1 is a prefix of output2
+            return output1;
+        } else if (pos2 == output2.offset + output2.length) {
+            // output2 is a prefix of output1
+            return output2;
+        } else {
+            return new BytesRef(output1.bytes, output1.offset, pos1 - output1.offset);
+        }
+    }
+
+    @Override
+    public BytesRef subtract(BytesRef output, BytesRef inc) {
+        assert output != null;
+        assert inc != null;
+        if (inc == NO_OUTPUT) {
+            // no prefix removed
+            return output;
+        } else {
+            assert StringHelper.startsWith(output, inc);
+            if (inc.length == output.length) {
+                // entire output removed
+                return NO_OUTPUT;
+            } else {
+                assert inc.length < output.length : "inc.length=" + inc.length + " vs output.length=" + output.length;
+                assert inc.length > 0;
+                return new BytesRef(output.bytes, output.offset + inc.length, output.length - inc.length);
+            }
+        }
+    }
+
+    @Override
+    public BytesRef add(BytesRef prefix, BytesRef output) {
+        assert prefix != null;
+        assert output != null;
+        if (prefix == NO_OUTPUT) {
+            return output;
+        } else if (output == NO_OUTPUT) {
+            return prefix;
+        } else {
+            assert prefix.length > 0;
+            assert output.length > 0;
+            BytesRef result = new BytesRef(prefix.length + output.length);
+            System.arraycopy(prefix.bytes, prefix.offset, result.bytes, 0, prefix.length);
+            System.arraycopy(output.bytes, output.offset, result.bytes, prefix.length, output.length);
+            result.length = prefix.length + output.length;
+            return result;
+        }
+    }
+
+    @Override
+    public void write(BytesRef prefix, DataOutput out) throws IOException {
+        assert prefix != null;
+        out.writeVInt(prefix.length);
+        out.writeBytes(prefix.bytes, prefix.offset, prefix.length);
+    }
+
+    @Override
+    public BytesRef read(DataInput in) throws IOException {
+        final int len = in.readVInt();
+        if (len == 0) {
+            return NO_OUTPUT;
+        } else {
+            final BytesRef output = new BytesRef(len);
+            in.readBytes(output.bytes, 0, len);
+            output.length = len;
+            return output;
+        }
+    }
+
+    @Override
+    public void skipOutput(DataInput in) throws IOException {
+        final int len = in.readVInt();
+        if (len != 0) {
+            in.skipBytes(len);
+        }
+    }
+
+    @Override
+    public BytesRef getNoOutput() {
+        return NO_OUTPUT;
+    }
+
+    @Override
+    public String outputToString(BytesRef output) {
+        return output.toString();
+    }
+
+    private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT);
+
+    @Override
+    public long ramBytesUsed(BytesRef output) {
+        return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.bytes);
+    }
+
+    @Override
+    public String toString() {
+        return "ByteSequenceOutputs";
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java
new file mode 100644
index 0000000000000..955327af17ba0
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java
@@ -0,0 +1,129 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * Enumerates all input (BytesRef) + output pairs in an FST.
+ *
+ * @lucene.experimental
+ */
+public final class BytesRefFSTEnum<T> extends FSTEnum<T> {
+    private final BytesRef current = new BytesRef(10);
+    private final InputOutput<T> result = new InputOutput<>();
+    private BytesRef target;
+
+    /** Holds a single input (BytesRef) + output pair. */
+    public static class InputOutput<T> {
+        public BytesRef input;
+        public T output;
+    }
+
+    /**
+     * doFloor controls the behavior of advance: if it's true doFloor is true, advance positions to
+     * the biggest term before target.
+     */
+    public BytesRefFSTEnum(FST<T> fst) {
+        super(fst);
+        result.input = current;
+        current.offset = 1;
+    }
+
+    public InputOutput<T> current() {
+        return result;
+    }
+
+    public InputOutput<T> next() throws IOException {
+        // System.out.println(" enum.next");
+        doNext();
+        return setResult();
+    }
+
+    /** Seeks to smallest term that's &gt;= target. */
+    public InputOutput<T> seekCeil(BytesRef target) throws IOException {
+        this.target = target;
+        targetLength = target.length;
+        super.doSeekCeil();
+        return setResult();
+    }
+
+    /** Seeks to biggest term that's &lt;= target. */
+    public InputOutput<T> seekFloor(BytesRef target) throws IOException {
+        this.target = target;
+        targetLength = target.length;
+        super.doSeekFloor();
+        return setResult();
+    }
+
+    /**
+     * Seeks to exactly this term, returning null if the term doesn't exist. This is faster than using
+     * {@link #seekFloor} or {@link #seekCeil} because it short-circuits as soon the match is not
+     * found.
+     */
+    public InputOutput<T> seekExact(BytesRef target) throws IOException {
+        this.target = target;
+        targetLength = target.length;
+        if (doSeekExact()) {
+            assert upto == 1 + target.length;
+            return setResult();
+        } else {
+            return null;
+        }
+    }
+
+    @Override
+    protected int getTargetLabel() {
+        if (upto - 1 == target.length) {
+            return FST.END_LABEL;
+        } else {
+            return target.bytes[target.offset + upto - 1] & 0xFF;
+        }
+    }
+
+    @Override
+    protected int getCurrentLabel() {
+        // current.offset fixed at 1
+        return current.bytes[upto] & 0xFF;
+    }
+
+    @Override
+    protected void setCurrentLabel(int label) {
+        current.bytes[upto] = (byte) label;
+    }
+
+    @Override
+    protected void grow() {
+        current.bytes = ArrayUtil.grow(current.bytes, upto + 1);
+    }
+
+    private InputOutput<T> setResult() {
+        if (upto == 0) {
+            return null;
+        } else {
+            current.length = upto - 1;
+            result.output = output[upto];
+            return result;
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesStore.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesStore.java
new file mode 100644
index 0000000000000..b2aaa9894466e
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesStore.java
@@ -0,0 +1,520 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+// TODO: merge with PagedBytes, except PagedBytes doesn't
+// let you read while writing which FST needs
+
+class BytesStore extends DataOutput implements Accountable {
+
+    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BytesStore.class) + RamUsageEstimator
+        .shallowSizeOfInstance(ArrayList.class);
+
+    private final List<byte[]> blocks = new ArrayList<>();
+
+    private final int blockSize;
+    private final int blockBits;
+    private final int blockMask;
+
+    private byte[] current;
+    private int nextWrite;
+
+    BytesStore(int blockBits) {
+        this.blockBits = blockBits;
+        blockSize = 1 << blockBits;
+        blockMask = blockSize - 1;
+        nextWrite = blockSize;
+    }
+
+    /** Pulls bytes from the provided IndexInput. */
+    BytesStore(DataInput in, long numBytes, int maxBlockSize) throws IOException {
+        int blockSize = 2;
+        int blockBits = 1;
+        while (blockSize < numBytes && blockSize < maxBlockSize) {
+            blockSize *= 2;
+            blockBits++;
+        }
+        this.blockBits = blockBits;
+        this.blockSize = blockSize;
+        this.blockMask = blockSize - 1;
+        long left = numBytes;
+        while (left > 0) {
+            final int chunk = (int) Math.min(blockSize, left);
+            byte[] block = new byte[chunk];
+            in.readBytes(block, 0, block.length);
+            blocks.add(block);
+            left -= chunk;
+        }
+
+        // So .getPosition still works
+        nextWrite = blocks.get(blocks.size() - 1).length;
+    }
+
+    /** Absolute write byte; you must ensure dest is &lt; max position written so far. */
+    public void writeByte(long dest, byte b) {
+        int blockIndex = (int) (dest >> blockBits);
+        byte[] block = blocks.get(blockIndex);
+        block[(int) (dest & blockMask)] = b;
+    }
+
+    @Override
+    public void writeByte(byte b) {
+        if (nextWrite == blockSize) {
+            current = new byte[blockSize];
+            blocks.add(current);
+            nextWrite = 0;
+        }
+        current[nextWrite++] = b;
+    }
+
+    @Override
+    public void writeBytes(byte[] b, int offset, int len) {
+        while (len > 0) {
+            int chunk = blockSize - nextWrite;
+            if (len <= chunk) {
+                assert b != null;
+                assert current != null;
+                System.arraycopy(b, offset, current, nextWrite, len);
+                nextWrite += len;
+                break;
+            } else {
+                if (chunk > 0) {
+                    System.arraycopy(b, offset, current, nextWrite, chunk);
+                    offset += chunk;
+                    len -= chunk;
+                }
+                current = new byte[blockSize];
+                blocks.add(current);
+                nextWrite = 0;
+            }
+        }
+    }
+
+    int getBlockBits() {
+        return blockBits;
+    }
+
+    /**
+     * Absolute writeBytes without changing the current position. Note: this cannot "grow" the bytes,
+     * so you must only call it on already written parts.
+     */
+    void writeBytes(long dest, byte[] b, int offset, int len) {
+        // System.out.println(" BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len);
+        assert dest + len <= getPosition() : "dest=" + dest + " pos=" + getPosition() + " len=" + len;
+
+        // Note: weird: must go "backwards" because copyBytes
+        // calls us with overlapping src/dest. If we
+        // go forwards then we overwrite bytes before we can
+        // copy them:
+
+        /*
+        int blockIndex = dest >> blockBits;
+        int upto = dest & blockMask;
+        byte[] block = blocks.get(blockIndex);
+        while (len > 0) {
+          int chunk = blockSize - upto;
+          System.out.println("    cycle chunk=" + chunk + " len=" + len);
+          if (len <= chunk) {
+        System.arraycopy(b, offset, block, upto, len);
+        break;
+          } else {
+        System.arraycopy(b, offset, block, upto, chunk);
+        offset += chunk;
+        len -= chunk;
+        blockIndex++;
+        block = blocks.get(blockIndex);
+        upto = 0;
+          }
+        }
+        */
+
+        final long end = dest + len;
+        int blockIndex = (int) (end >> blockBits);
+        int downTo = (int) (end & blockMask);
+        if (downTo == 0) {
+            blockIndex--;
+            downTo = blockSize;
+        }
+        byte[] block = blocks.get(blockIndex);
+
+        while (len > 0) {
+            // System.out.println(" cycle downTo=" + downTo + " len=" + len);
+            if (len <= downTo) {
+                // System.out.println(" final: offset=" + offset + " len=" + len + " dest=" +
+                // (downTo-len));
+                System.arraycopy(b, offset, block, downTo - len, len);
+                break;
+            } else {
+                len -= downTo;
+                // System.out.println(" partial: offset=" + (offset + len) + " len=" + downTo + "
+                // dest=0");
+                System.arraycopy(b, offset + len, block, 0, downTo);
+                blockIndex--;
+                block = blocks.get(blockIndex);
+                downTo = blockSize;
+            }
+        }
+    }
+
+    /**
+     * Absolute copy bytes self to self, without changing the position. Note: this cannot "grow" the
+     * bytes, so must only call it on already written parts.
+     */
+    public void copyBytes(long src, long dest, int len) {
+        // System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len);
+        assert src < dest;
+
+        // Note: weird: must go "backwards" because copyBytes
+        // calls us with overlapping src/dest. If we
+        // go forwards then we overwrite bytes before we can
+        // copy them:
+
+        /*
+        int blockIndex = src >> blockBits;
+        int upto = src & blockMask;
+        byte[] block = blocks.get(blockIndex);
+        while (len > 0) {
+          int chunk = blockSize - upto;
+          System.out.println("  cycle: chunk=" + chunk + " len=" + len);
+          if (len <= chunk) {
+        writeBytes(dest, block, upto, len);
+        break;
+          } else {
+        writeBytes(dest, block, upto, chunk);
+        blockIndex++;
+        block = blocks.get(blockIndex);
+        upto = 0;
+        len -= chunk;
+        dest += chunk;
+          }
+        }
+        */
+
+        long end = src + len;
+
+        int blockIndex = (int) (end >> blockBits);
+        int downTo = (int) (end & blockMask);
+        if (downTo == 0) {
+            blockIndex--;
+            downTo = blockSize;
+        }
+        byte[] block = blocks.get(blockIndex);
+
+        while (len > 0) {
+            // System.out.println(" cycle downTo=" + downTo);
+            if (len <= downTo) {
+                // System.out.println(" finish");
+                writeBytes(dest, block, downTo - len, len);
+                break;
+            } else {
+                // System.out.println(" partial");
+                len -= downTo;
+                writeBytes(dest + len, block, 0, downTo);
+                blockIndex--;
+                block = blocks.get(blockIndex);
+                downTo = blockSize;
+            }
+        }
+    }
+
+    /** Copies bytes from this store to a target byte array. */
+    public void copyBytes(long src, byte[] dest, int offset, int len) {
+        int blockIndex = (int) (src >> blockBits);
+        int upto = (int) (src & blockMask);
+        byte[] block = blocks.get(blockIndex);
+        while (len > 0) {
+            int chunk = blockSize - upto;
+            if (len <= chunk) {
+                System.arraycopy(block, upto, dest, offset, len);
+                break;
+            } else {
+                System.arraycopy(block, upto, dest, offset, chunk);
+                blockIndex++;
+                block = blocks.get(blockIndex);
+                upto = 0;
+                len -= chunk;
+                offset += chunk;
+            }
+        }
+    }
+
+    /** Writes an int at the absolute position without changing the current pointer. */
+    public void writeInt(long pos, int value) {
+        int blockIndex = (int) (pos >> blockBits);
+        int upto = (int) (pos & blockMask);
+        byte[] block = blocks.get(blockIndex);
+        int shift = 24;
+        for (int i = 0; i < 4; i++) {
+            block[upto++] = (byte) (value >> shift);
+            shift -= 8;
+            if (upto == blockSize) {
+                upto = 0;
+                blockIndex++;
+                block = blocks.get(blockIndex);
+            }
+        }
+    }
+
+    /** Reverse from srcPos, inclusive, to destPos, inclusive. */
+    public void reverse(long srcPos, long destPos) {
+        assert srcPos < destPos;
+        assert destPos < getPosition();
+        // System.out.println("reverse src=" + srcPos + " dest=" + destPos);
+
+        int srcBlockIndex = (int) (srcPos >> blockBits);
+        int src = (int) (srcPos & blockMask);
+        byte[] srcBlock = blocks.get(srcBlockIndex);
+
+        int destBlockIndex = (int) (destPos >> blockBits);
+        int dest = (int) (destPos & blockMask);
+        byte[] destBlock = blocks.get(destBlockIndex);
+        // System.out.println(" srcBlock=" + srcBlockIndex + " destBlock=" + destBlockIndex);
+
+        int limit = (int) (destPos - srcPos + 1) / 2;
+        for (int i = 0; i < limit; i++) {
+            // System.out.println(" cycle src=" + src + " dest=" + dest);
+            byte b = srcBlock[src];
+            srcBlock[src] = destBlock[dest];
+            destBlock[dest] = b;
+            src++;
+            if (src == blockSize) {
+                srcBlockIndex++;
+                srcBlock = blocks.get(srcBlockIndex);
+                // System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock);
+                src = 0;
+            }
+
+            dest--;
+            if (dest == -1) {
+                destBlockIndex--;
+                destBlock = blocks.get(destBlockIndex);
+                // System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock);
+                dest = blockSize - 1;
+            }
+        }
+    }
+
+    public void skipBytes(int len) {
+        while (len > 0) {
+            int chunk = blockSize - nextWrite;
+            if (len <= chunk) {
+                nextWrite += len;
+                break;
+            } else {
+                len -= chunk;
+                current = new byte[blockSize];
+                blocks.add(current);
+                nextWrite = 0;
+            }
+        }
+    }
+
+    public long getPosition() {
+        return ((long) blocks.size() - 1) * blockSize + nextWrite;
+    }
+
+    /**
+     * Pos must be less than the max position written so far! Ie, you cannot "grow" the file with
+     * this!
+     */
+    public void truncate(long newLen) {
+        assert newLen <= getPosition();
+        assert newLen >= 0;
+        int blockIndex = (int) (newLen >> blockBits);
+        nextWrite = (int) (newLen & blockMask);
+        if (nextWrite == 0) {
+            blockIndex--;
+            nextWrite = blockSize;
+        }
+        blocks.subList(blockIndex + 1, blocks.size()).clear();
+        if (newLen == 0) {
+            current = null;
+        } else {
+            current = blocks.get(blockIndex);
+        }
+        assert newLen == getPosition();
+    }
+
+    public void finish() {
+        if (current != null) {
+            byte[] lastBuffer = new byte[nextWrite];
+            System.arraycopy(current, 0, lastBuffer, 0, nextWrite);
+            blocks.set(blocks.size() - 1, lastBuffer);
+            current = null;
+        }
+    }
+
+    /** Writes all of our bytes to the target {@link DataOutput}. */
+    public void writeTo(DataOutput out) throws IOException {
+        for (byte[] block : blocks) {
+            out.writeBytes(block, 0, block.length);
+        }
+    }
+
+    public FST.BytesReader getForwardReader() {
+        if (blocks.size() == 1) {
+            return new ForwardBytesReader(blocks.get(0));
+        }
+        return new FST.BytesReader() {
+            private byte[] current;
+            private int nextBuffer;
+            private int nextRead = blockSize;
+
+            @Override
+            public byte readByte() {
+                if (nextRead == blockSize) {
+                    current = blocks.get(nextBuffer++);
+                    nextRead = 0;
+                }
+                return current[nextRead++];
+            }
+
+            @Override
+            public void skipBytes(long count) {
+                setPosition(getPosition() + count);
+            }
+
+            @Override
+            public void readBytes(byte[] b, int offset, int len) {
+                while (len > 0) {
+                    int chunkLeft = blockSize - nextRead;
+                    if (len <= chunkLeft) {
+                        System.arraycopy(current, nextRead, b, offset, len);
+                        nextRead += len;
+                        break;
+                    } else {
+                        if (chunkLeft > 0) {
+                            System.arraycopy(current, nextRead, b, offset, chunkLeft);
+                            offset += chunkLeft;
+                            len -= chunkLeft;
+                        }
+                        current = blocks.get(nextBuffer++);
+                        nextRead = 0;
+                    }
+                }
+            }
+
+            @Override
+            public long getPosition() {
+                return ((long) nextBuffer - 1) * blockSize + nextRead;
+            }
+
+            @Override
+            public void setPosition(long pos) {
+                int bufferIndex = (int) (pos >> blockBits);
+                if (nextBuffer != bufferIndex + 1) {
+                    nextBuffer = bufferIndex + 1;
+                    current = blocks.get(bufferIndex);
+                }
+                nextRead = (int) (pos & blockMask);
+                assert getPosition() == pos;
+            }
+
+            @Override
+            public boolean reversed() {
+                return false;
+            }
+        };
+    }
+
+    public FST.BytesReader getReverseReader() {
+        return getReverseReader(true);
+    }
+
+    FST.BytesReader getReverseReader(boolean allowSingle) {
+        if (allowSingle && blocks.size() == 1) {
+            return new ReverseBytesReader(blocks.get(0));
+        }
+        return new FST.BytesReader() {
+            private byte[] current = blocks.size() == 0 ? null : blocks.get(0);
+            private int nextBuffer = -1;
+            private int nextRead = 0;
+
+            @Override
+            public byte readByte() {
+                if (nextRead == -1) {
+                    current = blocks.get(nextBuffer--);
+                    nextRead = blockSize - 1;
+                }
+                return current[nextRead--];
+            }
+
+            @Override
+            public void skipBytes(long count) {
+                setPosition(getPosition() - count);
+            }
+
+            @Override
+            public void readBytes(byte[] b, int offset, int len) {
+                for (int i = 0; i < len; i++) {
+                    b[offset + i] = readByte();
+                }
+            }
+
+            @Override
+            public long getPosition() {
+                return ((long) nextBuffer + 1) * blockSize + nextRead;
+            }
+
+            @Override
+            public void setPosition(long pos) {
+                // NOTE: a little weird because if you
+                // setPosition(0), the next byte you read is
+                // bytes[0] ... but I would expect bytes[-1] (ie,
+                // EOF)...?
+                int bufferIndex = (int) (pos >> blockBits);
+                if (nextBuffer != bufferIndex - 1) {
+                    nextBuffer = bufferIndex - 1;
+                    current = blocks.get(bufferIndex);
+                }
+                nextRead = (int) (pos & blockMask);
+                assert getPosition() == pos : "pos=" + pos + " getPos()=" + getPosition();
+            }
+
+            @Override
+            public boolean reversed() {
+                return true;
+            }
+        };
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        long size = BASE_RAM_BYTES_USED;
+        for (byte[] block : blocks) {
+            size += RamUsageEstimator.sizeOf(block);
+        }
+        return size;
+    }
+
+    @Override
+    public String toString() {
+        return getClass().getSimpleName() + "(numBlocks=" + blocks.size() + ")";
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java
new file mode 100644
index 0000000000000..9fb73edb5a118
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java
@@ -0,0 +1,1569 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+// TODO: break this into WritableFST and ReadOnlyFST.. then
+// we can have subclasses of ReadOnlyFST to handle the
+// different byte[] level encodings (packed or
+// not)... and things like nodeCount, arcCount are read only
+
+// TODO: if FST is pure prefix trie we can do a more compact
+// job, ie, once we are at a 'suffix only', just store the
+// completion labels as a string not as a series of arcs.
+
+// NOTE: while the FST is able to represent a non-final
+// dead-end state (NON_FINAL_END_NODE=0), the layers above
+// (FSTEnum, Util) have problems with this!!
+
+/**
+ * Represents an finite state machine (FST), using a compact byte[] format.
+ *
+ * <p>The format is similar to what's used by Morfologik
+ * (https://github.com/morfologik/morfologik-stemming).
+ *
+ * <p>See the {@link org.apache.lucene.util.fst package documentation} for some simple examples.
+ *
+ * @lucene.experimental
+ */
+public final class FST<T> implements Accountable {
+
+    /** Specifies allowed range of each int input label for this FST. */
+    public enum INPUT_TYPE {
+        BYTE1,
+        BYTE2,
+        BYTE4
+    }
+
+    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FST.class);
+
+    private static final int BIT_FINAL_ARC = 1 << 0;
+    static final int BIT_LAST_ARC = 1 << 1;
+    static final int BIT_TARGET_NEXT = 1 << 2;
+
+    // TODO: we can free up a bit if we can nuke this:
+    private static final int BIT_STOP_NODE = 1 << 3;
+
+    /** This flag is set if the arc has an output. */
+    public static final int BIT_ARC_HAS_OUTPUT = 1 << 4;
+
+    private static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
+
+    /** Value of the arc flags to declare a node with fixed length arcs designed for binary search. */
+    // We use this as a marker because this one flag is illegal by itself.
+    public static final byte ARCS_FOR_BINARY_SEARCH = BIT_ARC_HAS_FINAL_OUTPUT;
+
+    /**
+     * Value of the arc flags to declare a node with fixed length arcs and bit table designed for
+     * direct addressing.
+     */
+    static final byte ARCS_FOR_DIRECT_ADDRESSING = 1 << 6;
+
+    /** @see #shouldExpandNodeWithFixedLengthArcs */
+    static final int FIXED_LENGTH_ARC_SHALLOW_DEPTH = 3; // 0 => only root node.
+
+    /** @see #shouldExpandNodeWithFixedLengthArcs */
+    static final int FIXED_LENGTH_ARC_SHALLOW_NUM_ARCS = 5;
+
+    /** @see #shouldExpandNodeWithFixedLengthArcs */
+    static final int FIXED_LENGTH_ARC_DEEP_NUM_ARCS = 10;
+
+    /**
+     * Maximum oversizing factor allowed for direct addressing compared to binary search when
+     * expansion credits allow the oversizing. This factor prevents expansions that are obviously too
+     * costly even if there are sufficient credits.
+     *
+     * @see #shouldExpandNodeWithDirectAddressing
+     */
+    private static final float DIRECT_ADDRESSING_MAX_OVERSIZE_WITH_CREDIT_FACTOR = 1.66f;
+
+    // Increment version to change it
+    private static final String FILE_FORMAT_NAME = "FST";
+    private static final int VERSION_START = 0;
+    /** Changed numBytesPerArc for array'd case from byte to int. */
+    private static final int VERSION_INT_NUM_BYTES_PER_ARC = 1;
+
+    /** Write BYTE2 labels as 2-byte short, not vInt. */
+    private static final int VERSION_SHORT_BYTE2_LABELS = 2;
+
+    /** Added optional packed format. */
+    private static final int VERSION_PACKED = 3;
+
+    /** Changed from int to vInt for encoding arc targets.
+     *  Also changed maxBytesPerArc from int to vInt in the array case. */
+    private static final int VERSION_VINT_TARGET = 4;
+
+    /** Don't store arcWithOutputCount anymore */
+    private static final int VERSION_NO_NODE_ARC_COUNTS = 5;
+
+    private static final int VERSION_PACKED_REMOVED = 6;
+
+    private static final int VERSION_LITTLE_ENDIAN = 8;
+    private static final int VERSION_CURRENT = VERSION_LITTLE_ENDIAN;
+
+    // Never serialized; just used to represent the virtual
+    // final node w/ no arcs:
+    private static final long FINAL_END_NODE = -1;
+
+    // Never serialized; just used to represent the virtual
+    // non-final node w/ no arcs:
+    private static final long NON_FINAL_END_NODE = 0;
+
+    /** If arc has this label then that arc is final/accepted */
+    public static final int END_LABEL = -1;
+
+    final INPUT_TYPE inputType;
+
+    // if non-null, this FST accepts the empty string and
+    // produces this output
+    T emptyOutput;
+
+    /**
+     * A {@link BytesStore}, used during building, or during reading when the FST is very large (more
+     * than 1 GB). If the FST is less than 1 GB then bytesArray is set instead.
+     */
+    final BytesStore bytes;
+
+    private final FSTStore fstStore;
+
+    private long startNode = -1;
+
+    public final Outputs<T> outputs;
+
+    private final int version;
+
+    /** Represents a single arc. */
+    public static final class Arc<T> {
+
+        // *** Arc fields.
+
+        private int label;
+
+        private T output;
+
+        private long target;
+
+        private byte flags;
+
+        private T nextFinalOutput;
+
+        private long nextArc;
+
+        private byte nodeFlags;
+
+        // *** Fields for arcs belonging to a node with fixed length arcs.
+        // So only valid when bytesPerArc != 0.
+        // nodeFlags == ARCS_FOR_BINARY_SEARCH || nodeFlags == ARCS_FOR_DIRECT_ADDRESSING.
+
+        private int bytesPerArc;
+
+        private long posArcsStart;
+
+        private int arcIdx;
+
+        private int numArcs;
+
+        // *** Fields for a direct addressing node. nodeFlags == ARCS_FOR_DIRECT_ADDRESSING.
+
+        /**
+         * Start position in the {@link BytesReader} of the presence bits for a direct addressing
+         * node, aka the bit-table
+         */
+        private long bitTableStart;
+
+        /** First label of a direct addressing node. */
+        private int firstLabel;
+
+        /**
+         * Index of the current label of a direct addressing node. While {@link #arcIdx} is the current
+         * index in the label range, {@link #presenceIndex} is its corresponding index in the list of
+         * actually present labels. It is equal to the number of bits set before the bit at {@link
+         * #arcIdx} in the bit-table. This field is a cache to avoid to count bits set repeatedly when
+         * iterating the next arcs.
+         */
+        private int presenceIndex;
+
+        /** Returns this */
+        public Arc<T> copyFrom(Arc<T> other) {
+            label = other.label();
+            target = other.target();
+            flags = other.flags();
+            output = other.output();
+            nextFinalOutput = other.nextFinalOutput();
+            nextArc = other.nextArc();
+            nodeFlags = other.nodeFlags();
+            bytesPerArc = other.bytesPerArc();
+
+            // Fields for arcs belonging to a node with fixed length arcs.
+            // We could avoid copying them if bytesPerArc() == 0 (this was the case with previous code,
+            // and the current code
+            // still supports that), but it may actually help external uses of FST to have consistent arc
+            // state, and debugging
+            // is easier.
+            posArcsStart = other.posArcsStart();
+            arcIdx = other.arcIdx();
+            numArcs = other.numArcs();
+            bitTableStart = other.bitTableStart;
+            firstLabel = other.firstLabel();
+            presenceIndex = other.presenceIndex;
+
+            return this;
+        }
+
+        boolean flag(int flag) {
+            return FST.flag(flags, flag);
+        }
+
+        public boolean isLast() {
+            return flag(BIT_LAST_ARC);
+        }
+
+        public boolean isFinal() {
+            return flag(BIT_FINAL_ARC);
+        }
+
+        @Override
+        public String toString() {
+            StringBuilder b = new StringBuilder();
+            b.append(" target=").append(target());
+            b.append(" label=0x").append(Integer.toHexString(label()));
+            if (flag(BIT_FINAL_ARC)) {
+                b.append(" final");
+            }
+            if (flag(BIT_LAST_ARC)) {
+                b.append(" last");
+            }
+            if (flag(BIT_TARGET_NEXT)) {
+                b.append(" targetNext");
+            }
+            if (flag(BIT_STOP_NODE)) {
+                b.append(" stop");
+            }
+            if (flag(BIT_ARC_HAS_OUTPUT)) {
+                b.append(" output=").append(output());
+            }
+            if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
+                b.append(" nextFinalOutput=").append(nextFinalOutput());
+            }
+            if (bytesPerArc() != 0) {
+                b.append(" arcArray(idx=")
+                    .append(arcIdx())
+                    .append(" of ")
+                    .append(numArcs())
+                    .append(")")
+                    .append("(")
+                    .append(nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING ? "da" : "bs")
+                    .append(")");
+            }
+            return b.toString();
+        }
+
+        public int label() {
+            return label;
+        }
+
+        public T output() {
+            return output;
+        }
+
+        /** Ord/address to target node. */
+        public long target() {
+            return target;
+        }
+
+        public byte flags() {
+            return flags;
+        }
+
+        public T nextFinalOutput() {
+            return nextFinalOutput;
+        }
+
+        /**
+         * Address (into the byte[]) of the next arc - only for list of variable length arc. Or
+         * ord/address to the next node if label == {@link #END_LABEL}.
+         */
+        long nextArc() {
+            return nextArc;
+        }
+
+        /** Where we are in the array; only valid if bytesPerArc != 0. */
+        public int arcIdx() {
+            return arcIdx;
+        }
+
+        /**
+         * Node header flags. Only meaningful to check if the value is either {@link
+         * #ARCS_FOR_BINARY_SEARCH} or {@link #ARCS_FOR_DIRECT_ADDRESSING} (other value when bytesPerArc
+         * == 0).
+         */
+        public byte nodeFlags() {
+            return nodeFlags;
+        }
+
+        /** Where the first arc in the array starts; only valid if bytesPerArc != 0 */
+        public long posArcsStart() {
+            return posArcsStart;
+        }
+
+        /**
+         * Non-zero if this arc is part of a node with fixed length arcs, which means all arcs for the
+         * node are encoded with a fixed number of bytes so that we binary search or direct address. We
+         * do when there are enough arcs leaving one node. It wastes some bytes but gives faster
+         * lookups.
+         */
+        public int bytesPerArc() {
+            return bytesPerArc;
+        }
+
+        /**
+         * How many arcs; only valid if bytesPerArc != 0 (fixed length arcs). For a node designed for
+         * binary search this is the array size. For a node designed for direct addressing, this is the
+         * label range.
+         */
+        public int numArcs() {
+            return numArcs;
+        }
+
+        /**
+         * First label of a direct addressing node. Only valid if nodeFlags == {@link
+         * #ARCS_FOR_DIRECT_ADDRESSING}.
+         */
+        int firstLabel() {
+            return firstLabel;
+        }
+
+        /**
+         * Helper methods to read the bit-table of a direct addressing node. Only valid for {@link Arc}
+         * with {@link Arc#nodeFlags()} == {@code ARCS_FOR_DIRECT_ADDRESSING}.
+         */
+        static class BitTable {
+
+            /** See {@link BitTableUtil#isBitSet(int, BytesReader)}. */
+            static boolean isBitSet(int bitIndex, Arc<?> arc, BytesReader in) throws IOException {
+                assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                in.setPosition(arc.bitTableStart);
+                return BitTableUtil.isBitSet(bitIndex, in);
+            }
+
+            /**
+             * See {@link BitTableUtil#countBits(int, BytesReader)}. The count of bit set is the
+             * number of arcs of a direct addressing node.
+             */
+            static int countBits(Arc<?> arc, BytesReader in) throws IOException {
+                assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                in.setPosition(arc.bitTableStart);
+                return BitTableUtil.countBits(getNumPresenceBytes(arc.numArcs()), in);
+            }
+
+            /** See {@link BitTableUtil#countBitsUpTo(int, BytesReader)}. */
+            static int countBitsUpTo(int bitIndex, Arc<?> arc, BytesReader in) throws IOException {
+                assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                in.setPosition(arc.bitTableStart);
+                return BitTableUtil.countBitsUpTo(bitIndex, in);
+            }
+
+            /** See {@link BitTableUtil#nextBitSet(int, int, BytesReader)}. */
+            static int nextBitSet(int bitIndex, Arc<?> arc, BytesReader in) throws IOException {
+                assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                in.setPosition(arc.bitTableStart);
+                return BitTableUtil.nextBitSet(bitIndex, getNumPresenceBytes(arc.numArcs()), in);
+            }
+
+            /** See {@link BitTableUtil#previousBitSet(int, BytesReader)}. */
+            static int previousBitSet(int bitIndex, Arc<?> arc, BytesReader in) throws IOException {
+                assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                in.setPosition(arc.bitTableStart);
+                return BitTableUtil.previousBitSet(bitIndex, in);
+            }
+
+            /** Asserts the bit-table of the provided {@link Arc} is valid. */
+            static boolean assertIsValid(Arc<?> arc, BytesReader in) throws IOException {
+                assert arc.bytesPerArc() > 0;
+                assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                // First bit must be set.
+                assert isBitSet(0, arc, in);
+                // Last bit must be set.
+                assert isBitSet(arc.numArcs() - 1, arc, in);
+                // No bit set after the last arc.
+                assert nextBitSet(arc.numArcs() - 1, arc, in) == -1;
+                return true;
+            }
+        }
+    }
+
+    private static boolean flag(int flags, int bit) {
+        return (flags & bit) != 0;
+    }
+
+    // make a new empty FST, for building; Builder invokes this
+    FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
+        this.inputType = inputType;
+        this.outputs = outputs;
+        fstStore = null;
+        bytes = new BytesStore(bytesPageBits);
+        // pad: ensure no node gets address 0 which is reserved to mean
+        // the stop state w/ no arcs
+        bytes.writeByte((byte) 0);
+        emptyOutput = null;
+        this.version = VERSION_CURRENT;
+    }
+
+    private static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
+
+    /** Load a previously saved FST. */
+    public FST(DataInput metaIn, DataInput in, Outputs<T> outputs) throws IOException {
+        this(metaIn, in, outputs, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS));
+    }
+
+    /**
+     * Load a previously saved FST; maxBlockBits allows you to control the size of the byte[] pages
+     * used to hold the FST bytes.
+     */
+    public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore) throws IOException {
+        bytes = null;
+        this.fstStore = fstStore;
+        this.outputs = outputs;
+
+        // NOTE: only reads formats VERSION_START up to VERSION_CURRENT; we don't have
+        // back-compat promise for FSTs (they are experimental), but we are sometimes able to offer it
+        this.version = CodecUtil.checkHeader(metaIn, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT);
+        if (version < VERSION_PACKED_REMOVED) {
+            if (in.readByte() == 1) {
+                throw new CorruptIndexException("Cannot read packed FSTs anymore", in);
+            }
+        }
+        if (metaIn.readByte() == 1) {
+            // accepts empty string
+            // 1 KB blocks:
+            BytesStore emptyBytes = new BytesStore(10);
+            int numBytes = metaIn.readVInt();
+            emptyBytes.copyBytes(metaIn, numBytes);
+
+            // De-serialize empty-string output:
+            BytesReader reader = emptyBytes.getReverseReader();
+            // NoOutputs uses 0 bytes when writing its output,
+            // so we have to check here else BytesStore gets
+            // angry:
+            if (numBytes > 0) {
+                reader.setPosition(numBytes - 1);
+            }
+            emptyOutput = outputs.readFinalOutput(reader);
+        } else {
+            emptyOutput = null;
+        }
+        final byte t = metaIn.readByte();
+        switch (t) {
+            case 0:
+                inputType = INPUT_TYPE.BYTE1;
+                break;
+            case 1:
+                inputType = INPUT_TYPE.BYTE2;
+                break;
+            case 2:
+                inputType = INPUT_TYPE.BYTE4;
+                break;
+            default:
+                throw new CorruptIndexException("invalid input type " + t, in);
+        }
+        startNode = metaIn.readVLong();
+        if (version < VERSION_NO_NODE_ARC_COUNTS) {
+            metaIn.readVLong();
+            metaIn.readVLong();
+            metaIn.readVLong();
+        }
+
+        long numBytes = metaIn.readVLong();
+        this.fstStore.init(in, numBytes);
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        long size = BASE_RAM_BYTES_USED;
+        if (this.fstStore != null) {
+            size += this.fstStore.ramBytesUsed();
+        } else {
+            size += bytes.ramBytesUsed();
+        }
+
+        return size;
+    }
+
+    @Override
+    public String toString() {
+        return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs;
+    }
+
+    void finish(long newStartNode) throws IOException {
+        assert newStartNode <= bytes.getPosition();
+        if (startNode != -1) {
+            throw new IllegalStateException("already finished");
+        }
+        if (newStartNode == FINAL_END_NODE && emptyOutput != null) {
+            newStartNode = 0;
+        }
+        startNode = newStartNode;
+        bytes.finish();
+    }
+
+    public T getEmptyOutput() {
+        return emptyOutput;
+    }
+
+    void setEmptyOutput(T v) {
+        if (emptyOutput != null) {
+            emptyOutput = outputs.merge(emptyOutput, v);
+        } else {
+            emptyOutput = v;
+        }
+    }
+
+    public void save(DataOutput metaOut, DataOutput out) throws IOException {
+        if (startNode == -1) {
+            throw new IllegalStateException("call finish first");
+        }
+        CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
+        // TODO: really we should encode this as an arc, arriving
+        // to the root node, instead of special casing here:
+        if (emptyOutput != null) {
+            // Accepts empty string
+            metaOut.writeByte((byte) 1);
+
+            // Serialize empty-string output:
+            ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
+            outputs.writeFinalOutput(emptyOutput, ros);
+            byte[] emptyOutputBytes = ros.toArrayCopy();
+            int emptyLen = emptyOutputBytes.length;
+
+            // reverse
+            final int stopAt = emptyLen / 2;
+            int upto = 0;
+            while (upto < stopAt) {
+                final byte b = emptyOutputBytes[upto];
+                emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
+                emptyOutputBytes[emptyLen - upto - 1] = b;
+                upto++;
+            }
+            metaOut.writeVInt(emptyLen);
+            metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
+        } else {
+            metaOut.writeByte((byte) 0);
+        }
+        final byte t;
+        if (inputType == FST.INPUT_TYPE.BYTE1) {
+            t = 0;
+        } else if (inputType == FST.INPUT_TYPE.BYTE2) {
+            t = 1;
+        } else {
+            t = 2;
+        }
+        metaOut.writeByte(t);
+        metaOut.writeVLong(startNode);
+        if (bytes != null) {
+            long numBytes = bytes.getPosition();
+            metaOut.writeVLong(numBytes);
+            bytes.writeTo(out);
+        } else {
+            assert fstStore != null;
+            fstStore.writeTo(out);
+        }
+    }
+
+    /** Writes an automaton to a file. */
+    public void save(final Path path) throws IOException {
+        try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
+            DataOutput out = new OutputStreamDataOutput(os);
+            save(out, out);
+        }
+    }
+
+    /** Reads an automaton from a file. */
+    public static <T> FST<T> read(Path path, Outputs<T> outputs) throws IOException {
+        try (InputStream is = Files.newInputStream(path)) {
+            DataInput in = new InputStreamDataInput(new BufferedInputStream(is));
+            return new FST<>(in, in, outputs);
+        }
+    }
+
+    private void writeLabel(DataOutput out, int v) throws IOException {
+        assert v >= 0 : "v=" + v;
+        if (inputType == FST.INPUT_TYPE.BYTE1) {
+            assert v <= 255 : "v=" + v;
+            out.writeByte((byte) v);
+        } else if (inputType == FST.INPUT_TYPE.BYTE2) {
+            assert v <= 65535 : "v=" + v;
+            out.writeShort((short) v);
+        } else {
+            out.writeVInt(v);
+        }
+    }
+
+    /** Reads one BYTE1/2/4 label from the provided {@link DataInput}. */
+    public int readLabel(DataInput in) throws IOException {
+        final int v;
+        if (inputType == INPUT_TYPE.BYTE1) {
+            // Unsigned byte:
+            v = in.readByte() & 0xFF;
+        } else if (inputType == INPUT_TYPE.BYTE2) {
+            // Unsigned short:
+            if (version < VERSION_LITTLE_ENDIAN) {
+                v = Short.reverseBytes(in.readShort()) & 0xFFFF;
+            } else {
+                v = in.readShort() & 0xFFFF;
+            }
+        } else {
+            v = in.readVInt();
+        }
+        return v;
+    }
+
+    /** returns true if the node at this address has any outgoing arcs */
+    public static <T> boolean targetHasArcs(Arc<T> arc) {
+        return arc.target() > 0;
+    }
+
+    // serializes new node by appending its bytes to the end
+    // of the current byte[]
+    long addNode(FSTCompiler<T> fstCompiler, FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
+        T NO_OUTPUT = outputs.getNoOutput();
+
+        // System.out.println("FST.addNode pos=" + bytes.getPosition() + " numArcs=" + nodeIn.numArcs);
+        if (nodeIn.numArcs == 0) {
+            if (nodeIn.isFinal) {
+                return FINAL_END_NODE;
+            } else {
+                return NON_FINAL_END_NODE;
+            }
+        }
+        final long startAddress = fstCompiler.bytes.getPosition();
+        // System.out.println(" startAddr=" + startAddress);
+
+        final boolean doFixedLengthArcs = shouldExpandNodeWithFixedLengthArcs(fstCompiler, nodeIn);
+        if (doFixedLengthArcs) {
+            // System.out.println(" fixed length arcs");
+            if (fstCompiler.numBytesPerArc.length < nodeIn.numArcs) {
+                fstCompiler.numBytesPerArc = new int[ArrayUtil.oversize(nodeIn.numArcs, Integer.BYTES)];
+                fstCompiler.numLabelBytesPerArc = new int[fstCompiler.numBytesPerArc.length];
+            }
+        }
+
+        fstCompiler.arcCount += nodeIn.numArcs;
+
+        final int lastArc = nodeIn.numArcs - 1;
+
+        long lastArcStart = fstCompiler.bytes.getPosition();
+        int maxBytesPerArc = 0;
+        int maxBytesPerArcWithoutLabel = 0;
+        for (int arcIdx = 0; arcIdx < nodeIn.numArcs; arcIdx++) {
+            final FSTCompiler.Arc<T> arc = nodeIn.arcs[arcIdx];
+            final FSTCompiler.CompiledNode target = (FSTCompiler.CompiledNode) arc.target;
+            int flags = 0;
+            // System.out.println(" arc " + arcIdx + " label=" + arc.label + " -> target=" +
+            // target.node);
+
+            if (arcIdx == lastArc) {
+                flags += BIT_LAST_ARC;
+            }
+
+            if (fstCompiler.lastFrozenNode == target.node && doFixedLengthArcs == false) {
+                // TODO: for better perf (but more RAM used) we
+                // could avoid this except when arc is "near" the
+                // last arc:
+                flags += BIT_TARGET_NEXT;
+            }
+
+            if (arc.isFinal) {
+                flags += BIT_FINAL_ARC;
+                if (arc.nextFinalOutput != NO_OUTPUT) {
+                    flags += BIT_ARC_HAS_FINAL_OUTPUT;
+                }
+            } else {
+                assert arc.nextFinalOutput == NO_OUTPUT;
+            }
+
+            boolean targetHasArcs = target.node > 0;
+
+            if (targetHasArcs == false) {
+                flags += BIT_STOP_NODE;
+            }
+
+            if (arc.output != NO_OUTPUT) {
+                flags += BIT_ARC_HAS_OUTPUT;
+            }
+
+            fstCompiler.bytes.writeByte((byte) flags);
+            long labelStart = fstCompiler.bytes.getPosition();
+            writeLabel(fstCompiler.bytes, arc.label);
+            int numLabelBytes = (int) (fstCompiler.bytes.getPosition() - labelStart);
+
+            // System.out.println(" write arc: label=" + (char) arc.label + " flags=" + flags + "
+            // target=" + target.node + " pos=" + bytes.getPosition() + " output=" +
+            // outputs.outputToString(arc.output));
+
+            if (arc.output != NO_OUTPUT) {
+                outputs.write(arc.output, fstCompiler.bytes);
+                // System.out.println(" write output");
+            }
+
+            if (arc.nextFinalOutput != NO_OUTPUT) {
+                // System.out.println(" write final output");
+                outputs.writeFinalOutput(arc.nextFinalOutput, fstCompiler.bytes);
+            }
+
+            if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) {
+                assert target.node > 0;
+                // System.out.println(" write target");
+                fstCompiler.bytes.writeVLong(target.node);
+            }
+
+            // just write the arcs "like normal" on first pass, but record how many bytes each one took
+            // and max byte size:
+            if (doFixedLengthArcs) {
+                int numArcBytes = (int) (fstCompiler.bytes.getPosition() - lastArcStart);
+                fstCompiler.numBytesPerArc[arcIdx] = numArcBytes;
+                fstCompiler.numLabelBytesPerArc[arcIdx] = numLabelBytes;
+                lastArcStart = fstCompiler.bytes.getPosition();
+                maxBytesPerArc = Math.max(maxBytesPerArc, numArcBytes);
+                maxBytesPerArcWithoutLabel = Math.max(maxBytesPerArcWithoutLabel, numArcBytes - numLabelBytes);
+                // System.out.println(" arcBytes=" + numArcBytes + " labelBytes=" + numLabelBytes);
+            }
+        }
+
+        // TODO: try to avoid wasteful cases: disable doFixedLengthArcs in that case
+        /*
+         *
+         * LUCENE-4682: what is a fair heuristic here?
+         * It could involve some of these:
+         * 1. how "busy" the node is: nodeIn.inputCount relative to frontier[0].inputCount?
+         * 2. how much binSearch saves over scan: nodeIn.numArcs
+         * 3. waste: numBytes vs numBytesExpanded
+         *
+         * the one below just looks at #3
+        if (doFixedLengthArcs) {
+          // rough heuristic: make this 1.25 "waste factor" a parameter to the phd ctor????
+          int numBytes = lastArcStart - startAddress;
+          int numBytesExpanded = maxBytesPerArc * nodeIn.numArcs;
+          if (numBytesExpanded > numBytes*1.25) {
+        doFixedLengthArcs = false;
+          }
+        }
+        */
+
+        if (doFixedLengthArcs) {
+            assert maxBytesPerArc > 0;
+            // 2nd pass just "expands" all arcs to take up a fixed byte size
+
+            int labelRange = nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label + 1;
+            assert labelRange > 0;
+            if (shouldExpandNodeWithDirectAddressing(fstCompiler, nodeIn, maxBytesPerArc, maxBytesPerArcWithoutLabel, labelRange)) {
+                writeNodeForDirectAddressing(fstCompiler, nodeIn, startAddress, maxBytesPerArcWithoutLabel, labelRange);
+                fstCompiler.directAddressingNodeCount++;
+            } else {
+                writeNodeForBinarySearch(fstCompiler, nodeIn, startAddress, maxBytesPerArc);
+                fstCompiler.binarySearchNodeCount++;
+            }
+        }
+
+        final long thisNodeAddress = fstCompiler.bytes.getPosition() - 1;
+        fstCompiler.bytes.reverse(startAddress, thisNodeAddress);
+        fstCompiler.nodeCount++;
+        return thisNodeAddress;
+    }
+
+    /**
+     * Returns whether the given node should be expanded with fixed length arcs. Nodes will be
+     * expanded depending on their depth (distance from the root node) and their number of arcs.
+     *
+     * <p>Nodes with fixed length arcs use more space, because they encode all arcs with a fixed
+     * number of bytes, but they allow either binary search or direct addressing on the arcs (instead
+     * of linear scan) on lookup by arc label.
+     */
+    private boolean shouldExpandNodeWithFixedLengthArcs(FSTCompiler<T> fstCompiler, FSTCompiler.UnCompiledNode<T> node) {
+        return fstCompiler.allowFixedLengthArcs
+            && ((node.depth <= FIXED_LENGTH_ARC_SHALLOW_DEPTH && node.numArcs >= FIXED_LENGTH_ARC_SHALLOW_NUM_ARCS)
+                || node.numArcs >= FIXED_LENGTH_ARC_DEEP_NUM_ARCS);
+    }
+
+    /**
+     * Returns whether the given node should be expanded with direct addressing instead of binary
+     * search.
+     *
+     * <p>Prefer direct addressing for performance if it does not oversize binary search byte size too
+     * much, so that the arcs can be directly addressed by label.
+     *
+     * @see FSTCompiler#getDirectAddressingMaxOversizingFactor()
+     */
+    private boolean shouldExpandNodeWithDirectAddressing(
+        FSTCompiler<T> fstCompiler,
+        FSTCompiler.UnCompiledNode<T> nodeIn,
+        int numBytesPerArc,
+        int maxBytesPerArcWithoutLabel,
+        int labelRange
+    ) {
+        // Anticipate precisely the size of the encodings.
+        int sizeForBinarySearch = numBytesPerArc * nodeIn.numArcs;
+        int sizeForDirectAddressing = getNumPresenceBytes(labelRange) + fstCompiler.numLabelBytesPerArc[0] + maxBytesPerArcWithoutLabel
+            * nodeIn.numArcs;
+
+        // Determine the allowed oversize compared to binary search.
+        // This is defined by a parameter of FST Builder (default 1: no oversize).
+        int allowedOversize = (int) (sizeForBinarySearch * fstCompiler.getDirectAddressingMaxOversizingFactor());
+        int expansionCost = sizeForDirectAddressing - allowedOversize;
+
+        // Select direct addressing if either:
+        // - Direct addressing size is smaller than binary search.
+        // In this case, increment the credit by the reduced size (to use it later).
+        // - Direct addressing size is larger than binary search, but the positive credit allows the
+        // oversizing.
+        // In this case, decrement the credit by the oversize.
+        // In addition, do not try to oversize to a clearly too large node size
+        // (this is the DIRECT_ADDRESSING_MAX_OVERSIZE_WITH_CREDIT_FACTOR parameter).
+        if (expansionCost <= 0
+            || (fstCompiler.directAddressingExpansionCredit >= expansionCost
+                && sizeForDirectAddressing <= allowedOversize * DIRECT_ADDRESSING_MAX_OVERSIZE_WITH_CREDIT_FACTOR)) {
+            fstCompiler.directAddressingExpansionCredit -= expansionCost;
+            return true;
+        }
+        return false;
+    }
+
+    private void writeNodeForBinarySearch(
+        FSTCompiler<T> fstCompiler,
+        FSTCompiler.UnCompiledNode<T> nodeIn,
+        long startAddress,
+        int maxBytesPerArc
+    ) {
+        // Build the header in a buffer.
+        // It is a false/special arc which is in fact a node header with node flags followed by node
+        // metadata.
+        fstCompiler.fixedLengthArcsBuffer.resetPosition()
+            .writeByte(ARCS_FOR_BINARY_SEARCH)
+            .writeVInt(nodeIn.numArcs)
+            .writeVInt(maxBytesPerArc);
+        int headerLen = fstCompiler.fixedLengthArcsBuffer.getPosition();
+
+        // Expand the arcs in place, backwards.
+        long srcPos = fstCompiler.bytes.getPosition();
+        long destPos = startAddress + headerLen + nodeIn.numArcs * maxBytesPerArc;
+        assert destPos >= srcPos;
+        if (destPos > srcPos) {
+            fstCompiler.bytes.skipBytes((int) (destPos - srcPos));
+            for (int arcIdx = nodeIn.numArcs - 1; arcIdx >= 0; arcIdx--) {
+                destPos -= maxBytesPerArc;
+                int arcLen = fstCompiler.numBytesPerArc[arcIdx];
+                srcPos -= arcLen;
+                if (srcPos != destPos) {
+                    assert destPos > srcPos
+                        : "destPos="
+                            + destPos
+                            + " srcPos="
+                            + srcPos
+                            + " arcIdx="
+                            + arcIdx
+                            + " maxBytesPerArc="
+                            + maxBytesPerArc
+                            + " arcLen="
+                            + arcLen
+                            + " nodeIn.numArcs="
+                            + nodeIn.numArcs;
+                    fstCompiler.bytes.copyBytes(srcPos, destPos, arcLen);
+                }
+            }
+        }
+
+        // Write the header.
+        fstCompiler.bytes.writeBytes(startAddress, fstCompiler.fixedLengthArcsBuffer.getBytes(), 0, headerLen);
+    }
+
+    private void writeNodeForDirectAddressing(
+        FSTCompiler<T> fstCompiler,
+        FSTCompiler.UnCompiledNode<T> nodeIn,
+        long startAddress,
+        int maxBytesPerArcWithoutLabel,
+        int labelRange
+    ) {
+        // Expand the arcs backwards in a buffer because we remove the labels.
+        // So the obtained arcs might occupy less space. This is the reason why this
+        // whole method is more complex.
+        // Drop the label bytes since we can infer the label based on the arc index,
+        // the presence bits, and the first label. Keep the first label.
+        int headerMaxLen = 11;
+        int numPresenceBytes = getNumPresenceBytes(labelRange);
+        long srcPos = fstCompiler.bytes.getPosition();
+        int totalArcBytes = fstCompiler.numLabelBytesPerArc[0] + nodeIn.numArcs * maxBytesPerArcWithoutLabel;
+        int bufferOffset = headerMaxLen + numPresenceBytes + totalArcBytes;
+        byte[] buffer = fstCompiler.fixedLengthArcsBuffer.ensureCapacity(bufferOffset).getBytes();
+        // Copy the arcs to the buffer, dropping all labels except first one.
+        for (int arcIdx = nodeIn.numArcs - 1; arcIdx >= 0; arcIdx--) {
+            bufferOffset -= maxBytesPerArcWithoutLabel;
+            int srcArcLen = fstCompiler.numBytesPerArc[arcIdx];
+            srcPos -= srcArcLen;
+            int labelLen = fstCompiler.numLabelBytesPerArc[arcIdx];
+            // Copy the flags.
+            fstCompiler.bytes.copyBytes(srcPos, buffer, bufferOffset, 1);
+            // Skip the label, copy the remaining.
+            int remainingArcLen = srcArcLen - 1 - labelLen;
+            if (remainingArcLen != 0) {
+                fstCompiler.bytes.copyBytes(srcPos + 1 + labelLen, buffer, bufferOffset + 1, remainingArcLen);
+            }
+            if (arcIdx == 0) {
+                // Copy the label of the first arc only.
+                bufferOffset -= labelLen;
+                fstCompiler.bytes.copyBytes(srcPos + 1, buffer, bufferOffset, labelLen);
+            }
+        }
+        assert bufferOffset == headerMaxLen + numPresenceBytes;
+
+        // Build the header in the buffer.
+        // It is a false/special arc which is in fact a node header with node flags followed by node
+        // metadata.
+        fstCompiler.fixedLengthArcsBuffer.resetPosition()
+            .writeByte(ARCS_FOR_DIRECT_ADDRESSING)
+            .writeVInt(labelRange) // labelRange instead of numArcs.
+            .writeVInt(maxBytesPerArcWithoutLabel); // maxBytesPerArcWithoutLabel instead of maxBytesPerArc.
+        int headerLen = fstCompiler.fixedLengthArcsBuffer.getPosition();
+
+        // Prepare the builder byte store. Enlarge or truncate if needed.
+        long nodeEnd = startAddress + headerLen + numPresenceBytes + totalArcBytes;
+        long currentPosition = fstCompiler.bytes.getPosition();
+        if (nodeEnd >= currentPosition) {
+            fstCompiler.bytes.skipBytes((int) (nodeEnd - currentPosition));
+        } else {
+            fstCompiler.bytes.truncate(nodeEnd);
+        }
+        assert fstCompiler.bytes.getPosition() == nodeEnd;
+
+        // Write the header.
+        long writeOffset = startAddress;
+        fstCompiler.bytes.writeBytes(writeOffset, fstCompiler.fixedLengthArcsBuffer.getBytes(), 0, headerLen);
+        writeOffset += headerLen;
+
+        // Write the presence bits
+        writePresenceBits(fstCompiler, nodeIn, writeOffset, numPresenceBytes);
+        writeOffset += numPresenceBytes;
+
+        // Write the first label and the arcs.
+        fstCompiler.bytes.writeBytes(writeOffset, fstCompiler.fixedLengthArcsBuffer.getBytes(), bufferOffset, totalArcBytes);
+    }
+
+    private void writePresenceBits(FSTCompiler<T> fstCompiler, FSTCompiler.UnCompiledNode<T> nodeIn, long dest, int numPresenceBytes) {
+        long bytePos = dest;
+        byte presenceBits = 1; // The first arc is always present.
+        int presenceIndex = 0;
+        int previousLabel = nodeIn.arcs[0].label;
+        for (int arcIdx = 1; arcIdx < nodeIn.numArcs; arcIdx++) {
+            int label = nodeIn.arcs[arcIdx].label;
+            assert label > previousLabel;
+            presenceIndex += label - previousLabel;
+            while (presenceIndex >= Byte.SIZE) {
+                fstCompiler.bytes.writeByte(bytePos++, presenceBits);
+                presenceBits = 0;
+                presenceIndex -= Byte.SIZE;
+            }
+            // Set the bit at presenceIndex to flag that the corresponding arc is present.
+            presenceBits |= 1 << presenceIndex;
+            previousLabel = label;
+        }
+        assert presenceIndex == (nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label) % 8;
+        assert presenceBits != 0; // The last byte is not 0.
+        assert (presenceBits & (1 << presenceIndex)) != 0; // The last arc is always present.
+        fstCompiler.bytes.writeByte(bytePos++, presenceBits);
+        assert bytePos - dest == numPresenceBytes;
+    }
+
+    /**
+     * Gets the number of bytes required to flag the presence of each arc in the given label range,
+     * one bit per arc.
+     */
+    private static int getNumPresenceBytes(int labelRange) {
+        assert labelRange >= 0;
+        return (labelRange + 7) >> 3;
+    }
+
+    /**
+     * Reads the presence bits of a direct-addressing node. Actually we don't read them here, we just
+     * keep the pointer to the bit-table start and we skip them.
+     */
+    private void readPresenceBytes(Arc<T> arc, BytesReader in) throws IOException {
+        assert arc.bytesPerArc() > 0;
+        assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+        arc.bitTableStart = in.getPosition();
+        in.skipBytes(getNumPresenceBytes(arc.numArcs()));
+    }
+
+    /** Fills virtual 'start' arc, ie, an empty incoming arc to the FST's start node */
+    public Arc<T> getFirstArc(Arc<T> arc) {
+        T NO_OUTPUT = outputs.getNoOutput();
+
+        if (emptyOutput != null) {
+            arc.flags = BIT_FINAL_ARC | BIT_LAST_ARC;
+            arc.nextFinalOutput = emptyOutput;
+            if (emptyOutput != NO_OUTPUT) {
+                arc.flags = (byte) (arc.flags() | BIT_ARC_HAS_FINAL_OUTPUT);
+            }
+        } else {
+            arc.flags = BIT_LAST_ARC;
+            arc.nextFinalOutput = NO_OUTPUT;
+        }
+        arc.output = NO_OUTPUT;
+
+        // If there are no nodes, ie, the FST only accepts the
+        // empty string, then startNode is 0
+        arc.target = startNode;
+        return arc;
+    }
+
+    /**
+     * Follows the <code>follow</code> arc and reads the last arc of its target; this changes the
+     * provided <code>arc</code> (2nd arg) in-place and returns it.
+     *
+     * @return Returns the second argument (<code>arc</code>).
+     */
+    Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
+        // System.out.println("readLast");
+        if (targetHasArcs(follow) == false) {
+            // System.out.println(" end node");
+            assert follow.isFinal();
+            arc.label = END_LABEL;
+            arc.target = FINAL_END_NODE;
+            arc.output = follow.nextFinalOutput();
+            arc.flags = BIT_LAST_ARC;
+            arc.nodeFlags = arc.flags;
+            return arc;
+        } else {
+            in.setPosition(follow.target());
+            byte flags = arc.nodeFlags = in.readByte();
+            if (flags == ARCS_FOR_BINARY_SEARCH || flags == ARCS_FOR_DIRECT_ADDRESSING) {
+                // Special arc which is actually a node header for fixed length arcs.
+                // Jump straight to end to find the last arc.
+                arc.numArcs = in.readVInt();
+                if (version >= VERSION_VINT_TARGET) {
+                    arc.bytesPerArc = in.readVInt();
+                } else {
+                    arc.bytesPerArc = in.readInt();
+                }
+                // System.out.println(" array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc);
+                if (flags == ARCS_FOR_DIRECT_ADDRESSING) {
+                    readPresenceBytes(arc, in);
+                    arc.firstLabel = readLabel(in);
+                    arc.posArcsStart = in.getPosition();
+                    readLastArcByDirectAddressing(arc, in);
+                } else {
+                    arc.arcIdx = arc.numArcs() - 2;
+                    arc.posArcsStart = in.getPosition();
+                    readNextRealArc(arc, in);
+                }
+            } else {
+                arc.flags = flags;
+                // non-array: linear scan
+                arc.bytesPerArc = 0;
+                // System.out.println(" scan");
+                while (arc.isLast() == false) {
+                    // skip this arc:
+                    readLabel(in);
+                    if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
+                        outputs.skipOutput(in);
+                    }
+                    if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
+                        outputs.skipFinalOutput(in);
+                    }
+                    if (arc.flag(BIT_STOP_NODE)) {} else if (arc.flag(BIT_TARGET_NEXT)) {} else {
+                        readUnpackedNodeTarget(in);
+                    }
+                    arc.flags = in.readByte();
+                }
+                // Undo the byte flags we read:
+                in.skipBytes(-1);
+                arc.nextArc = in.getPosition();
+                readNextRealArc(arc, in);
+            }
+            assert arc.isLast();
+            return arc;
+        }
+    }
+
+    private long readUnpackedNodeTarget(BytesReader in) throws IOException {
+        if (version < VERSION_VINT_TARGET) {
+            return in.readInt();
+        } else {
+            return in.readVLong();
+        }
+    }
+
+    /**
+     * Follow the <code>follow</code> arc and read the first arc of its target; this changes the
+     * provided <code>arc</code> (2nd arg) in-place and returns it.
+     *
+     * @return Returns the second argument (<code>arc</code>).
+     */
+    public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
+        // int pos = address;
+        // System.out.println(" readFirstTarget follow.target=" + follow.target + " isFinal=" +
+        // follow.isFinal());
+        if (follow.isFinal()) {
+            // Insert "fake" final first arc:
+            arc.label = END_LABEL;
+            arc.output = follow.nextFinalOutput();
+            arc.flags = BIT_FINAL_ARC;
+            if (follow.target() <= 0) {
+                arc.flags |= BIT_LAST_ARC;
+            } else {
+                // NOTE: nextArc is a node (not an address!) in this case:
+                arc.nextArc = follow.target();
+            }
+            arc.target = FINAL_END_NODE;
+            arc.nodeFlags = arc.flags;
+            // System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" +
+            // arc.isLast() + " output=" + outputs.outputToString(arc.output));
+            return arc;
+        } else {
+            return readFirstRealTargetArc(follow.target(), arc, in);
+        }
+    }
+
+    public Arc<T> readFirstRealTargetArc(long nodeAddress, Arc<T> arc, final BytesReader in) throws IOException {
+        in.setPosition(nodeAddress);
+        // System.out.println(" flags=" + arc.flags);
+
+        byte flags = arc.nodeFlags = in.readByte();
+        if (flags == ARCS_FOR_BINARY_SEARCH || flags == ARCS_FOR_DIRECT_ADDRESSING) {
+            // System.out.println(" fixed length arc");
+            // Special arc which is actually a node header for fixed length arcs.
+            arc.numArcs = in.readVInt();
+            if (version >= VERSION_VINT_TARGET) {
+                arc.bytesPerArc = in.readVInt();
+            } else {
+                arc.bytesPerArc = in.readInt();
+            }
+            arc.arcIdx = -1;
+            if (flags == ARCS_FOR_DIRECT_ADDRESSING) {
+                readPresenceBytes(arc, in);
+                arc.firstLabel = readLabel(in);
+                arc.presenceIndex = -1;
+            }
+            arc.posArcsStart = in.getPosition();
+            // System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + "
+            // arcsStart=" + pos);
+        } else {
+            arc.nextArc = nodeAddress;
+            arc.bytesPerArc = 0;
+        }
+
+        return readNextRealArc(arc, in);
+    }
+
+    /**
+     * Returns whether <code>arc</code>'s target points to a node in expanded format (fixed length
+     * arcs).
+     */
+    boolean isExpandedTarget(Arc<T> follow, BytesReader in) throws IOException {
+        if (targetHasArcs(follow) == false) {
+            return false;
+        } else {
+            in.setPosition(follow.target());
+            byte flags = in.readByte();
+            return flags == ARCS_FOR_BINARY_SEARCH || flags == ARCS_FOR_DIRECT_ADDRESSING;
+        }
+    }
+
+    /** In-place read; returns the arc. */
+    public Arc<T> readNextArc(Arc<T> arc, BytesReader in) throws IOException {
+        if (arc.label() == END_LABEL) {
+            // This was a fake inserted "final" arc
+            if (arc.nextArc() <= 0) {
+                throw new IllegalArgumentException("cannot readNextArc when arc.isLast()=true");
+            }
+            return readFirstRealTargetArc(arc.nextArc(), arc, in);
+        } else {
+            return readNextRealArc(arc, in);
+        }
+    }
+
+    /** Peeks at next arc's label; does not alter arc. Do not call this if arc.isLast()! */
+    int readNextArcLabel(Arc<T> arc, BytesReader in) throws IOException {
+        assert arc.isLast() == false;
+
+        if (arc.label() == END_LABEL) {
+            // System.out.println(" nextArc fake " + arc.nextArc);
+            // Next arc is the first arc of a node.
+            // Position to read the first arc label.
+
+            in.setPosition(arc.nextArc());
+            byte flags = in.readByte();
+            if (flags == ARCS_FOR_BINARY_SEARCH || flags == ARCS_FOR_DIRECT_ADDRESSING) {
+                // System.out.println(" nextArc fixed length arc");
+                // Special arc which is actually a node header for fixed length arcs.
+                int numArcs = in.readVInt();
+                if (version >= VERSION_VINT_TARGET) {
+                    in.readVInt(); // Skip bytesPerArc.
+                } else {
+                    in.readInt(); // Skip bytesPerArc.
+                }
+                if (flags == ARCS_FOR_BINARY_SEARCH) {
+                    in.readByte(); // Skip arc flags.
+                } else {
+                    in.skipBytes(getNumPresenceBytes(numArcs));
+                }
+            }
+        } else {
+            if (arc.bytesPerArc() != 0) {
+                // System.out.println(" nextArc real array");
+                // Arcs have fixed length.
+                if (arc.nodeFlags() == ARCS_FOR_BINARY_SEARCH) {
+                    // Point to next arc, -1 to skip arc flags.
+                    in.setPosition(arc.posArcsStart() - (1 + arc.arcIdx()) * arc.bytesPerArc() - 1);
+                } else {
+                    assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+                    // Direct addressing node. The label is not stored but rather inferred
+                    // based on first label and arc index in the range.
+                    assert Arc.BitTable.assertIsValid(arc, in);
+                    assert Arc.BitTable.isBitSet(arc.arcIdx(), arc, in);
+                    int nextIndex = Arc.BitTable.nextBitSet(arc.arcIdx(), arc, in);
+                    assert nextIndex != -1;
+                    return arc.firstLabel() + nextIndex;
+                }
+            } else {
+                // Arcs have variable length.
+                // System.out.println(" nextArc real list");
+                // Position to next arc, -1 to skip flags.
+                in.setPosition(arc.nextArc() - 1);
+            }
+        }
+        return readLabel(in);
+    }
+
+    public Arc<T> readArcByIndex(Arc<T> arc, final BytesReader in, int idx) throws IOException {
+        assert arc.bytesPerArc() > 0;
+        assert arc.nodeFlags() == ARCS_FOR_BINARY_SEARCH;
+        assert idx >= 0 && idx < arc.numArcs();
+        in.setPosition(arc.posArcsStart() - idx * arc.bytesPerArc());
+        arc.arcIdx = idx;
+        arc.flags = in.readByte();
+        return readArc(arc, in);
+    }
+
+    /**
+     * Reads a present direct addressing node arc, with the provided index in the label range.
+     *
+     * @param rangeIndex The index of the arc in the label range. It must be present. The real arc
+     *     offset is computed based on the presence bits of the direct addressing node.
+     */
+    public Arc<T> readArcByDirectAddressing(Arc<T> arc, final BytesReader in, int rangeIndex) throws IOException {
+        assert Arc.BitTable.assertIsValid(arc, in);
+        assert rangeIndex >= 0 && rangeIndex < arc.numArcs();
+        assert Arc.BitTable.isBitSet(rangeIndex, arc, in);
+        int presenceIndex = Arc.BitTable.countBitsUpTo(rangeIndex, arc, in);
+        return readArcByDirectAddressing(arc, in, rangeIndex, presenceIndex);
+    }
+
+    /**
+     * Reads a present direct addressing node arc, with the provided index in the label range and its
+     * corresponding presence index (which is the count of presence bits before it).
+     */
+    private Arc<T> readArcByDirectAddressing(Arc<T> arc, final BytesReader in, int rangeIndex, int presenceIndex) throws IOException {
+        in.setPosition(arc.posArcsStart() - presenceIndex * arc.bytesPerArc());
+        arc.arcIdx = rangeIndex;
+        arc.presenceIndex = presenceIndex;
+        arc.flags = in.readByte();
+        return readArc(arc, in);
+    }
+
+    /**
+     * Reads the last arc of a direct addressing node. This method is equivalent to call {@link
+     * #readArcByDirectAddressing(Arc, BytesReader, int)} with {@code rangeIndex} equal to {@code
+     * arc.numArcs() - 1}, but it is faster.
+     */
+    public Arc<T> readLastArcByDirectAddressing(Arc<T> arc, final BytesReader in) throws IOException {
+        assert Arc.BitTable.assertIsValid(arc, in);
+        int presenceIndex = Arc.BitTable.countBits(arc, in) - 1;
+        return readArcByDirectAddressing(arc, in, arc.numArcs() - 1, presenceIndex);
+    }
+
+    /** Never returns null, but you should never call this if arc.isLast() is true. */
+    public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
+
+        // TODO: can't assert this because we call from readFirstArc
+        // assert !flag(arc.flags, BIT_LAST_ARC);
+
+        switch (arc.nodeFlags()) {
+            case ARCS_FOR_BINARY_SEARCH:
+                assert arc.bytesPerArc() > 0;
+                arc.arcIdx++;
+                assert arc.arcIdx() >= 0 && arc.arcIdx() < arc.numArcs();
+                in.setPosition(arc.posArcsStart() - arc.arcIdx() * arc.bytesPerArc());
+                arc.flags = in.readByte();
+                break;
+
+            case ARCS_FOR_DIRECT_ADDRESSING:
+                assert Arc.BitTable.assertIsValid(arc, in);
+                assert arc.arcIdx() == -1 || Arc.BitTable.isBitSet(arc.arcIdx(), arc, in);
+                int nextIndex = Arc.BitTable.nextBitSet(arc.arcIdx(), arc, in);
+                return readArcByDirectAddressing(arc, in, nextIndex, arc.presenceIndex + 1);
+
+            default:
+                // Variable length arcs - linear search.
+                assert arc.bytesPerArc() == 0;
+                in.setPosition(arc.nextArc());
+                arc.flags = in.readByte();
+        }
+        return readArc(arc, in);
+    }
+
+    /**
+     * Reads an arc. <br>
+     * Precondition: The arc flags byte has already been read and set; the given BytesReader is
+     * positioned just after the arc flags byte.
+     */
+    private Arc<T> readArc(Arc<T> arc, BytesReader in) throws IOException {
+        if (arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING) {
+            arc.label = arc.firstLabel() + arc.arcIdx();
+        } else {
+            arc.label = readLabel(in);
+        }
+
+        if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
+            arc.output = outputs.read(in);
+        } else {
+            arc.output = outputs.getNoOutput();
+        }
+
+        if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
+            arc.nextFinalOutput = outputs.readFinalOutput(in);
+        } else {
+            arc.nextFinalOutput = outputs.getNoOutput();
+        }
+
+        if (arc.flag(BIT_STOP_NODE)) {
+            if (arc.flag(BIT_FINAL_ARC)) {
+                arc.target = FINAL_END_NODE;
+            } else {
+                arc.target = NON_FINAL_END_NODE;
+            }
+            arc.nextArc = in.getPosition(); // Only useful for list.
+        } else if (arc.flag(BIT_TARGET_NEXT)) {
+            arc.nextArc = in.getPosition(); // Only useful for list.
+            // TODO: would be nice to make this lazy -- maybe
+            // caller doesn't need the target and is scanning arcs...
+            if (arc.flag(BIT_LAST_ARC) == false) {
+                if (arc.bytesPerArc() == 0) {
+                    // must scan
+                    seekToNextNode(in);
+                } else {
+                    int numArcs = arc.nodeFlags == ARCS_FOR_DIRECT_ADDRESSING ? Arc.BitTable.countBits(arc, in) : arc.numArcs();
+                    in.setPosition(arc.posArcsStart() - arc.bytesPerArc() * numArcs);
+                }
+            }
+            arc.target = in.getPosition();
+        } else {
+            arc.target = readUnpackedNodeTarget(in);
+            arc.nextArc = in.getPosition(); // Only useful for list.
+        }
+        return arc;
+    }
+
+    static <T> Arc<T> readEndArc(Arc<T> follow, Arc<T> arc) {
+        if (follow.isFinal()) {
+            if (follow.target() <= 0) {
+                arc.flags = FST.BIT_LAST_ARC;
+            } else {
+                arc.flags = 0;
+                // NOTE: nextArc is a node (not an address!) in this case:
+                arc.nextArc = follow.target();
+            }
+            arc.output = follow.nextFinalOutput();
+            arc.label = FST.END_LABEL;
+            return arc;
+        } else {
+            return null;
+        }
+    }
+
+    // TODO: could we somehow [partially] tableize arc lookups
+    // like automaton?
+
+    /**
+     * Finds an arc leaving the incoming arc, replacing the arc in place. This returns null if the arc
+     * was not found, else the incoming arc.
+     */
+    public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
+
+        if (labelToMatch == END_LABEL) {
+            if (follow.isFinal()) {
+                if (follow.target() <= 0) {
+                    arc.flags = BIT_LAST_ARC;
+                } else {
+                    arc.flags = 0;
+                    // NOTE: nextArc is a node (not an address!) in this case:
+                    arc.nextArc = follow.target();
+                }
+                arc.output = follow.nextFinalOutput();
+                arc.label = END_LABEL;
+                arc.nodeFlags = arc.flags;
+                return arc;
+            } else {
+                return null;
+            }
+        }
+
+        if (targetHasArcs(follow) == false) {
+            return null;
+        }
+
+        in.setPosition(follow.target());
+
+        // System.out.println("fta label=" + (char) labelToMatch);
+
+        byte flags = arc.nodeFlags = in.readByte();
+        if (flags == ARCS_FOR_DIRECT_ADDRESSING) {
+            arc.numArcs = in.readVInt(); // This is in fact the label range.
+            if (version >= VERSION_VINT_TARGET) {
+                arc.bytesPerArc = in.readVInt();
+            } else {
+                arc.bytesPerArc = in.readInt();
+            }
+            readPresenceBytes(arc, in);
+            arc.firstLabel = readLabel(in);
+            arc.posArcsStart = in.getPosition();
+
+            int arcIndex = labelToMatch - arc.firstLabel();
+            if (arcIndex < 0 || arcIndex >= arc.numArcs()) {
+                return null; // Before or after label range.
+            } else if (Arc.BitTable.isBitSet(arcIndex, arc, in) == false) {
+                return null; // Arc missing in the range.
+            }
+            return readArcByDirectAddressing(arc, in, arcIndex);
+        } else if (flags == ARCS_FOR_BINARY_SEARCH) {
+            arc.numArcs = in.readVInt();
+            if (version >= VERSION_VINT_TARGET) {
+                arc.bytesPerArc = in.readVInt();
+            } else {
+                arc.bytesPerArc = in.readInt();
+            }
+            arc.posArcsStart = in.getPosition();
+
+            // Array is sparse; do binary search:
+            int low = 0;
+            int high = arc.numArcs() - 1;
+            while (low <= high) {
+                // System.out.println(" cycle");
+                int mid = (low + high) >>> 1;
+                // +1 to skip over flags
+                in.setPosition(arc.posArcsStart() - (arc.bytesPerArc() * mid + 1));
+                int midLabel = readLabel(in);
+                final int cmp = midLabel - labelToMatch;
+                if (cmp < 0) {
+                    low = mid + 1;
+                } else if (cmp > 0) {
+                    high = mid - 1;
+                } else {
+                    arc.arcIdx = mid - 1;
+                    // System.out.println(" found!");
+                    return readNextRealArc(arc, in);
+                }
+            }
+            return null;
+        }
+
+        // Linear scan
+        readFirstRealTargetArc(follow.target(), arc, in);
+
+        while (true) {
+            // System.out.println(" non-bs cycle");
+            // TODO: we should fix this code to not have to create
+            // object for the output of every arc we scan... only
+            // for the matching arc, if found
+            if (arc.label() == labelToMatch) {
+                // System.out.println(" found!");
+                return arc;
+            } else if (arc.label() > labelToMatch) {
+                return null;
+            } else if (arc.isLast()) {
+                return null;
+            } else {
+                readNextRealArc(arc, in);
+            }
+        }
+    }
+
+    private void seekToNextNode(BytesReader in) throws IOException {
+
+        while (true) {
+
+            final int flags = in.readByte();
+            readLabel(in);
+
+            if (flag(flags, BIT_ARC_HAS_OUTPUT)) {
+                outputs.skipOutput(in);
+            }
+
+            if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) {
+                outputs.skipFinalOutput(in);
+            }
+
+            if (flag(flags, BIT_STOP_NODE) == false && flag(flags, BIT_TARGET_NEXT) == false) {
+                readUnpackedNodeTarget(in);
+            }
+
+            if (flag(flags, BIT_LAST_ARC)) {
+                return;
+            }
+        }
+    }
+
+    /** Returns a {@link BytesReader} for this FST, positioned at position 0. */
+    public BytesReader getBytesReader() {
+        if (this.fstStore != null) {
+            return this.fstStore.getReverseBytesReader();
+        } else {
+            return bytes.getReverseReader();
+        }
+    }
+
+    /** Reads bytes stored in an FST. */
+    public abstract static class BytesReader extends DataInput {
+        /** Get current read position. */
+        public abstract long getPosition();
+
+        /** Set current read position. */
+        public abstract void setPosition(long pos);
+
+        /** Returns true if this reader uses reversed bytes under-the-hood. */
+        public abstract boolean reversed();
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java
new file mode 100644
index 0000000000000..7ee6eaa5f7ba4
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java
@@ -0,0 +1,804 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST.INPUT_TYPE;
+
+import java.io.IOException;
+
+// TODO: could we somehow stream an FST to disk while we
+// build it?
+
+/**
+ * Builds a minimal FST (maps an IntsRef term to an arbitrary output) from pre-sorted terms with
+ * outputs. The FST becomes an FSA if you use NoOutputs. The FST is written on-the-fly into a
+ * compact serialized format byte array, which can be saved to / loaded from a Directory or used
+ * directly for traversal. The FST is always finite (no cycles).
+ *
+ * <p>NOTE: The algorithm is described at
+ * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698
+ *
+ * <p>The parameterized type T is the output type. See the subclasses of {@link Outputs}.
+ *
+ * <p>FSTs larger than 2.1GB are now possible (as of Lucene 4.2). FSTs containing more than 2.1B
+ * nodes are also now possible, however they cannot be packed.
+ *
+ * @lucene.experimental
+ */
+public class FSTCompiler<T> {
+
+    static final float DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR = 1f;
+
+    private final NodeHash<T> dedupHash;
+    final FST<T> fst;
+    private final T NO_OUTPUT;
+
+    // private static final boolean DEBUG = true;
+
+    // simplistic pruning: we prune node (and all following
+    // nodes) if less than this number of terms go through it:
+    private final int minSuffixCount1;
+
+    // better pruning: we prune node (and all following
+    // nodes) if the prior node has less than this number of
+    // terms go through it:
+    private final int minSuffixCount2;
+
+    private final boolean doShareNonSingletonNodes;
+    private final int shareMaxTailLength;
+
+    private final IntsRefBuilder lastInput = new IntsRefBuilder();
+
+    // NOTE: cutting this over to ArrayList instead loses ~6%
+    // in build performance on 9.8M Wikipedia terms; so we
+    // left this as an array:
+    // current "frontier"
+    private UnCompiledNode<T>[] frontier;
+
+    // Used for the BIT_TARGET_NEXT optimization (whereby
+    // instead of storing the address of the target node for
+    // a given arc, we mark a single bit noting that the next
+    // node in the byte[] is the target node):
+    long lastFrozenNode;
+
+    // Reused temporarily while building the FST:
+    int[] numBytesPerArc = new int[4];
+    int[] numLabelBytesPerArc = new int[numBytesPerArc.length];
+    final FixedLengthArcsBuffer fixedLengthArcsBuffer = new FixedLengthArcsBuffer();
+
+    long arcCount;
+    long nodeCount;
+    long binarySearchNodeCount;
+    long directAddressingNodeCount;
+
+    final boolean allowFixedLengthArcs;
+    final float directAddressingMaxOversizingFactor;
+    long directAddressingExpansionCredit;
+
+    final BytesStore bytes;
+
+    /**
+     * Instantiates an FST/FSA builder with default settings and pruning options turned off. For more
+     * tuning and tweaking, see {@link Builder}.
+     */
+    public FSTCompiler(INPUT_TYPE inputType, Outputs<T> outputs) {
+        this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15, 1f);
+    }
+
+    private FSTCompiler(
+        INPUT_TYPE inputType,
+        int minSuffixCount1,
+        int minSuffixCount2,
+        boolean doShareSuffix,
+        boolean doShareNonSingletonNodes,
+        int shareMaxTailLength,
+        Outputs<T> outputs,
+        boolean allowFixedLengthArcs,
+        int bytesPageBits,
+        float directAddressingMaxOversizingFactor
+    ) {
+        this.minSuffixCount1 = minSuffixCount1;
+        this.minSuffixCount2 = minSuffixCount2;
+        this.doShareNonSingletonNodes = doShareNonSingletonNodes;
+        this.shareMaxTailLength = shareMaxTailLength;
+        this.allowFixedLengthArcs = allowFixedLengthArcs;
+        this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
+        fst = new FST<>(inputType, outputs, bytesPageBits);
+        bytes = fst.bytes;
+        assert bytes != null;
+        if (doShareSuffix) {
+            dedupHash = new NodeHash<>(fst, bytes.getReverseReader(false));
+        } else {
+            dedupHash = null;
+        }
+        NO_OUTPUT = outputs.getNoOutput();
+
+        @SuppressWarnings({ "rawtypes", "unchecked" })
+        final UnCompiledNode<T>[] f = (UnCompiledNode<T>[]) new UnCompiledNode[10];
+        frontier = f;
+        for (int idx = 0; idx < frontier.length; idx++) {
+            frontier[idx] = new UnCompiledNode<>(this, idx);
+        }
+    }
+
+    /**
+     * Fluent-style constructor for FST {@link FSTCompiler}.
+     *
+     * <p>Creates an FST/FSA builder with all the possible tuning and construction tweaks. Read
+     * parameter documentation carefully.
+     */
+    public static class Builder<T> {
+
+        private final INPUT_TYPE inputType;
+        private final Outputs<T> outputs;
+        private int minSuffixCount1;
+        private int minSuffixCount2;
+        private boolean shouldShareSuffix = true;
+        private boolean shouldShareNonSingletonNodes = true;
+        private int shareMaxTailLength = Integer.MAX_VALUE;
+        private boolean allowFixedLengthArcs = true;
+        private int bytesPageBits = 15;
+        private float directAddressingMaxOversizingFactor = DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR;
+
+        /**
+         * @param inputType The input type (transition labels). Can be anything from {@link INPUT_TYPE}
+         *     enumeration. Shorter types will consume less memory. Strings (character sequences) are
+         *     represented as {@link INPUT_TYPE#BYTE4} (full unicode codepoints).
+         * @param outputs The output type for each input sequence. Applies only if building an FST.
+         */
+        public Builder(INPUT_TYPE inputType, Outputs<T> outputs) {
+            this.inputType = inputType;
+            this.outputs = outputs;
+        }
+
+        /**
+         * If pruning the input graph during construction, this threshold is used for telling if a node
+         * is kept or pruned. If transition_count(node) &gt;= minSuffixCount1, the node is kept.
+         *
+         * <p>Default = 0.
+         */
+        public Builder<T> minSuffixCount1(int minSuffixCount1) {
+            this.minSuffixCount1 = minSuffixCount1;
+            return this;
+        }
+
+        /**
+         * Better pruning: we prune node (and all following nodes) if the prior node has less than this
+         * number of terms go through it.
+         *
+         * <p>Default = 0.
+         */
+        public Builder<T> minSuffixCount2(int minSuffixCount2) {
+            this.minSuffixCount2 = minSuffixCount2;
+            return this;
+        }
+
+        /**
+         * If {@code true}, the shared suffixes will be compacted into unique paths. This requires an
+         * additional RAM-intensive hash map for lookups in memory. Setting this parameter to {@code
+         * false} creates a single suffix path for all input sequences. This will result in a larger
+         * FST, but requires substantially less memory and CPU during building.
+         *
+         * <p>Default = {@code true}.
+         */
+        public Builder<T> shouldShareSuffix(boolean shouldShareSuffix) {
+            this.shouldShareSuffix = shouldShareSuffix;
+            return this;
+        }
+
+        /**
+         * Only used if {@code shouldShareSuffix} is true. Set this to true to ensure FST is fully
+         * minimal, at cost of more CPU and more RAM during building.
+         *
+         * <p>Default = {@code true}.
+         */
+        public Builder<T> shouldShareNonSingletonNodes(boolean shouldShareNonSingletonNodes) {
+            this.shouldShareNonSingletonNodes = shouldShareNonSingletonNodes;
+            return this;
+        }
+
+        /**
+         * Only used if {@code shouldShareSuffix} is true. Set this to Integer.MAX_VALUE to ensure FST
+         * is fully minimal, at cost of more CPU and more RAM during building.
+         *
+         * <p>Default = {@link Integer#MAX_VALUE}.
+         */
+        public Builder<T> shareMaxTailLength(int shareMaxTailLength) {
+            this.shareMaxTailLength = shareMaxTailLength;
+            return this;
+        }
+
+        /**
+         * Pass {@code false} to disable the fixed length arc optimization (binary search or direct
+         * addressing) while building the FST; this will make the resulting FST smaller but slower to
+         * traverse.
+         *
+         * <p>Default = {@code true}.
+         */
+        public Builder<T> allowFixedLengthArcs(boolean allowFixedLengthArcs) {
+            this.allowFixedLengthArcs = allowFixedLengthArcs;
+            return this;
+        }
+
+        /**
+         * How many bits wide to make each byte[] block in the BytesStore; if you know the FST will be
+         * large then make this larger. For example 15 bits = 32768 byte pages.
+         *
+         * <p>Default = 15.
+         */
+        public Builder<T> bytesPageBits(int bytesPageBits) {
+            this.bytesPageBits = bytesPageBits;
+            return this;
+        }
+
+        /**
+         * Overrides the default the maximum oversizing of fixed array allowed to enable direct
+         * addressing of arcs instead of binary search.
+         *
+         * <p>Setting this factor to a negative value (e.g. -1) effectively disables direct addressing,
+         * only binary search nodes will be created.
+         *
+         * <p>This factor does not determine whether to encode a node with a list of variable length
+         * arcs or with fixed length arcs. It only determines the effective encoding of a node that is
+         * already known to be encoded with fixed length arcs.
+         *
+         * <p>Default = 1.
+         */
+        public Builder<T> directAddressingMaxOversizingFactor(float factor) {
+            this.directAddressingMaxOversizingFactor = factor;
+            return this;
+        }
+
+        /** Creates a new {@link FSTCompiler}. */
+        public FSTCompiler<T> build() {
+            FSTCompiler<T> fstCompiler = new FSTCompiler<>(
+                inputType,
+                minSuffixCount1,
+                minSuffixCount2,
+                shouldShareSuffix,
+                shouldShareNonSingletonNodes,
+                shareMaxTailLength,
+                outputs,
+                allowFixedLengthArcs,
+                bytesPageBits,
+                directAddressingMaxOversizingFactor
+            );
+            return fstCompiler;
+        }
+    }
+
+    public float getDirectAddressingMaxOversizingFactor() {
+        return directAddressingMaxOversizingFactor;
+    }
+
+    public long getTermCount() {
+        return frontier[0].inputCount;
+    }
+
+    public long getNodeCount() {
+        // 1+ in order to count the -1 implicit final node
+        return 1 + nodeCount;
+    }
+
+    public long getArcCount() {
+        return arcCount;
+    }
+
+    public long getMappedStateCount() {
+        return dedupHash == null ? 0 : nodeCount;
+    }
+
+    private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
+        final long node;
+        long bytesPosStart = bytes.getPosition();
+        if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
+            if (nodeIn.numArcs == 0) {
+                node = fst.addNode(this, nodeIn);
+                lastFrozenNode = node;
+            } else {
+                node = dedupHash.add(this, nodeIn);
+            }
+        } else {
+            node = fst.addNode(this, nodeIn);
+        }
+        assert node != -2;
+
+        long bytesPosEnd = bytes.getPosition();
+        if (bytesPosEnd != bytesPosStart) {
+            // The FST added a new node:
+            assert bytesPosEnd > bytesPosStart;
+            lastFrozenNode = node;
+        }
+
+        nodeIn.clear();
+
+        final CompiledNode fn = new CompiledNode();
+        fn.node = node;
+        return fn;
+    }
+
+    private void freezeTail(int prefixLenPlus1) throws IOException {
+        // System.out.println(" compileTail " + prefixLenPlus1);
+        final int downTo = Math.max(1, prefixLenPlus1);
+        for (int idx = lastInput.length(); idx >= downTo; idx--) {
+
+            boolean doPrune = false;
+            boolean doCompile = false;
+
+            final UnCompiledNode<T> node = frontier[idx];
+            final UnCompiledNode<T> parent = frontier[idx - 1];
+
+            if (node.inputCount < minSuffixCount1) {
+                doPrune = true;
+                doCompile = true;
+            } else if (idx > prefixLenPlus1) {
+                // prune if parent's inputCount is less than suffixMinCount2
+                if (parent.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && parent.inputCount == 1 && idx > 1)) {
+                    // my parent, about to be compiled, doesn't make the cut, so
+                    // I'm definitely pruned
+
+                    // if minSuffixCount2 is 1, we keep only up
+                    // until the 'distinguished edge', ie we keep only the
+                    // 'divergent' part of the FST. if my parent, about to be
+                    // compiled, has inputCount 1 then we are already past the
+                    // distinguished edge. NOTE: this only works if
+                    // the FST outputs are not "compressible" (simple
+                    // ords ARE compressible).
+                    doPrune = true;
+                } else {
+                    // my parent, about to be compiled, does make the cut, so
+                    // I'm definitely not pruned
+                    doPrune = false;
+                }
+                doCompile = true;
+            } else {
+                // if pruning is disabled (count is 0) we can always
+                // compile current node
+                doCompile = minSuffixCount2 == 0;
+            }
+
+            // System.out.println(" label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx="
+            // + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune="
+            // + doPrune);
+
+            if (node.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && node.inputCount == 1 && idx > 1)) {
+                // drop all arcs
+                for (int arcIdx = 0; arcIdx < node.numArcs; arcIdx++) {
+                    @SuppressWarnings({ "rawtypes", "unchecked" })
+                    final UnCompiledNode<T> target = (UnCompiledNode<T>) node.arcs[arcIdx].target;
+                    target.clear();
+                }
+                node.numArcs = 0;
+            }
+
+            if (doPrune) {
+                // this node doesn't make it -- deref it
+                node.clear();
+                parent.deleteLast(lastInput.intAt(idx - 1), node);
+            } else {
+
+                if (minSuffixCount2 != 0) {
+                    compileAllTargets(node, lastInput.length() - idx);
+                }
+                final T nextFinalOutput = node.output;
+
+                // We "fake" the node as being final if it has no
+                // outgoing arcs; in theory we could leave it
+                // as non-final (the FST can represent this), but
+                // FSTEnum, Util, etc., have trouble w/ non-final
+                // dead-end states:
+                final boolean isFinal = node.isFinal || node.numArcs == 0;
+
+                if (doCompile) {
+                    // this node makes it and we now compile it. first,
+                    // compile any targets that were previously
+                    // undecided:
+                    parent.replaceLast(lastInput.intAt(idx - 1), compileNode(node, 1 + lastInput.length() - idx), nextFinalOutput, isFinal);
+                } else {
+                    // replaceLast just to install
+                    // nextFinalOutput/isFinal onto the arc
+                    parent.replaceLast(lastInput.intAt(idx - 1), node, nextFinalOutput, isFinal);
+                    // this node will stay in play for now, since we are
+                    // undecided on whether to prune it. later, it
+                    // will be either compiled or pruned, so we must
+                    // allocate a new node:
+                    frontier[idx] = new UnCompiledNode<>(this, idx);
+                }
+            }
+        }
+    }
+
+    // for debugging
+    /*
+    private String toString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      return b.toString();
+    }
+    }
+    */
+
+    /**
+     * Add the next input/output pair. The provided input must be sorted after the previous one
+     * according to {@link IntsRef#compareTo}. It's also OK to add the same input twice in a row with
+     * different outputs, as long as {@link Outputs} implements the {@link Outputs#merge} method. Note
+     * that input is fully consumed after this method is returned (so caller is free to reuse), but
+     * output is not. So if your outputs are changeable (eg {@link ByteSequenceOutputs})
+     * then you cannot reuse across calls.
+     */
+    public void add(IntsRef input, T output) throws IOException {
+        /*
+        if (DEBUG) {
+          BytesRef b = new BytesRef(input.length);
+          for(int x=0;x<input.length;x++) {
+        b.bytes[x] = (byte) input.ints[x];
+          }
+          b.length = input.length;
+          if (output == NO_OUTPUT) {
+        System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
+          } else {
+        System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
+          }
+        }
+        */
+
+        // De-dup NO_OUTPUT since it must be a singleton:
+        if (output.equals(NO_OUTPUT)) {
+            output = NO_OUTPUT;
+        }
+
+        assert lastInput.length() == 0 || input.compareTo(lastInput.get()) >= 0
+            : "inputs are added out of order lastInput=" + lastInput.get() + " vs input=" + input;
+        assert validOutput(output);
+
+        // System.out.println("\nadd: " + input);
+        if (input.length == 0) {
+            // empty input: only allowed as first input. we have
+            // to special case this because the packed FST
+            // format cannot represent the empty input since
+            // 'finalness' is stored on the incoming arc, not on
+            // the node
+            frontier[0].inputCount++;
+            frontier[0].isFinal = true;
+            fst.setEmptyOutput(output);
+            return;
+        }
+
+        // compare shared prefix length
+        int pos1 = 0;
+        int pos2 = input.offset;
+        final int pos1Stop = Math.min(lastInput.length(), input.length);
+        while (true) {
+            frontier[pos1].inputCount++;
+            // System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" +
+            // frontier[pos1]);
+            if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
+                break;
+            }
+            pos1++;
+            pos2++;
+        }
+        final int prefixLenPlus1 = pos1 + 1;
+
+        if (frontier.length < input.length + 1) {
+            final UnCompiledNode<T>[] next = ArrayUtil.grow(frontier, input.length + 1);
+            for (int idx = frontier.length; idx < next.length; idx++) {
+                next[idx] = new UnCompiledNode<>(this, idx);
+            }
+            frontier = next;
+        }
+
+        // minimize/compile states from previous input's
+        // orphan'd suffix
+        freezeTail(prefixLenPlus1);
+
+        // init tail states for current input
+        for (int idx = prefixLenPlus1; idx <= input.length; idx++) {
+            frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]);
+            frontier[idx].inputCount++;
+        }
+
+        final UnCompiledNode<T> lastNode = frontier[input.length];
+        if (lastInput.length() != input.length || prefixLenPlus1 != input.length + 1) {
+            lastNode.isFinal = true;
+            lastNode.output = NO_OUTPUT;
+        }
+
+        // push conflicting outputs forward, only as far as
+        // needed
+        for (int idx = 1; idx < prefixLenPlus1; idx++) {
+            final UnCompiledNode<T> node = frontier[idx];
+            final UnCompiledNode<T> parentNode = frontier[idx - 1];
+
+            final T lastOutput = parentNode.getLastOutput(input.ints[input.offset + idx - 1]);
+            assert validOutput(lastOutput);
+
+            final T commonOutputPrefix;
+            final T wordSuffix;
+
+            if (lastOutput != NO_OUTPUT) {
+                commonOutputPrefix = fst.outputs.common(output, lastOutput);
+                assert validOutput(commonOutputPrefix);
+                wordSuffix = fst.outputs.subtract(lastOutput, commonOutputPrefix);
+                assert validOutput(wordSuffix);
+                parentNode.setLastOutput(input.ints[input.offset + idx - 1], commonOutputPrefix);
+                node.prependOutput(wordSuffix);
+            } else {
+                commonOutputPrefix = wordSuffix = NO_OUTPUT;
+            }
+
+            output = fst.outputs.subtract(output, commonOutputPrefix);
+            assert validOutput(output);
+        }
+
+        if (lastInput.length() == input.length && prefixLenPlus1 == 1 + input.length) {
+            // same input more than 1 time in a row, mapping to
+            // multiple outputs
+            lastNode.output = fst.outputs.merge(lastNode.output, output);
+        } else {
+            // this new arc is private to this new input; set its
+            // arc output to the leftover output:
+            frontier[prefixLenPlus1 - 1].setLastOutput(input.ints[input.offset + prefixLenPlus1 - 1], output);
+        }
+
+        // save last input
+        lastInput.copyInts(input);
+
+        // System.out.println(" count[0]=" + frontier[0].inputCount);
+    }
+
+    private boolean validOutput(T output) {
+        return output == NO_OUTPUT || output.equals(NO_OUTPUT) == false;
+    }
+
+    /** Returns final FST. NOTE: this will return null if nothing is accepted by the FST. */
+    public FST<T> compile() throws IOException {
+
+        final UnCompiledNode<T> root = frontier[0];
+
+        // minimize nodes in the last word's suffix
+        freezeTail(0);
+        if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) {
+            if (fst.emptyOutput == null) {
+                return null;
+            } else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
+                // empty string got pruned
+                return null;
+            }
+        } else {
+            if (minSuffixCount2 != 0) {
+                compileAllTargets(root, lastInput.length());
+            }
+        }
+        // if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + "
+        // root.output=" + root.output);
+        fst.finish(compileNode(root, lastInput.length()).node);
+
+        return fst;
+    }
+
+    private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
+        for (int arcIdx = 0; arcIdx < node.numArcs; arcIdx++) {
+            final Arc<T> arc = node.arcs[arcIdx];
+            if (arc.target.isCompiled() == false) {
+                // not yet compiled
+                @SuppressWarnings({ "rawtypes", "unchecked" })
+                final UnCompiledNode<T> n = (UnCompiledNode<T>) arc.target;
+                if (n.numArcs == 0) {
+                    // System.out.println("seg=" + segment + " FORCE final arc=" + (char) arc.label);
+                    arc.isFinal = n.isFinal = true;
+                }
+                arc.target = compileNode(n, tailLength - 1);
+            }
+        }
+    }
+
+    /** Expert: holds a pending (seen but not yet serialized) arc. */
+    static class Arc<T> {
+        int label; // really an "unsigned" byte
+        Node target;
+        boolean isFinal;
+        T output;
+        T nextFinalOutput;
+    }
+
+    // NOTE: not many instances of Node or CompiledNode are in
+    // memory while the FST is being built; it's only the
+    // current "frontier":
+
+    interface Node {
+        boolean isCompiled();
+    }
+
+    public long fstRamBytesUsed() {
+        return fst.ramBytesUsed();
+    }
+
+    static final class CompiledNode implements Node {
+        long node;
+
+        @Override
+        public boolean isCompiled() {
+            return true;
+        }
+    }
+
+    /** Expert: holds a pending (seen but not yet serialized) Node. */
+    static final class UnCompiledNode<T> implements Node {
+        final FSTCompiler<T> owner;
+        int numArcs;
+        Arc<T>[] arcs;
+        // TODO: instead of recording isFinal/output on the
+        // node, maybe we should use -1 arc to mean "end" (like
+        // we do when reading the FST). Would simplify much
+        // code here...
+        T output;
+        boolean isFinal;
+        long inputCount;
+
+        /** This node's depth, starting from the automaton root. */
+        final int depth;
+
+        /**
+         * @param depth The node's depth starting from the automaton root. Needed for LUCENE-2934 (node
+         *     expansion based on conditions other than the fanout size).
+         */
+        @SuppressWarnings({ "rawtypes", "unchecked" })
+        UnCompiledNode(FSTCompiler<T> owner, int depth) {
+            this.owner = owner;
+            arcs = (Arc<T>[]) new Arc[1];
+            arcs[0] = new Arc<>();
+            output = owner.NO_OUTPUT;
+            this.depth = depth;
+        }
+
+        @Override
+        public boolean isCompiled() {
+            return false;
+        }
+
+        void clear() {
+            numArcs = 0;
+            isFinal = false;
+            output = owner.NO_OUTPUT;
+            inputCount = 0;
+
+            // We don't clear the depth here because it never changes
+            // for nodes on the frontier (even when reused).
+        }
+
+        T getLastOutput(int labelToMatch) {
+            assert numArcs > 0;
+            assert arcs[numArcs - 1].label == labelToMatch;
+            return arcs[numArcs - 1].output;
+        }
+
+        void addArc(int label, Node target) {
+            assert label >= 0;
+            assert numArcs == 0 || label > arcs[numArcs - 1].label
+                : "arc[numArcs-1].label=" + arcs[numArcs - 1].label + " new label=" + label + " numArcs=" + numArcs;
+            if (numArcs == arcs.length) {
+                final Arc<T>[] newArcs = ArrayUtil.grow(arcs);
+                for (int arcIdx = numArcs; arcIdx < newArcs.length; arcIdx++) {
+                    newArcs[arcIdx] = new Arc<>();
+                }
+                arcs = newArcs;
+            }
+            final Arc<T> arc = arcs[numArcs++];
+            arc.label = label;
+            arc.target = target;
+            arc.output = arc.nextFinalOutput = owner.NO_OUTPUT;
+            arc.isFinal = false;
+        }
+
+        void replaceLast(int labelToMatch, Node target, T nextFinalOutput, boolean isFinal) {
+            assert numArcs > 0;
+            final Arc<T> arc = arcs[numArcs - 1];
+            assert arc.label == labelToMatch : "arc.label=" + arc.label + " vs " + labelToMatch;
+            arc.target = target;
+            // assert target.node != -2;
+            arc.nextFinalOutput = nextFinalOutput;
+            arc.isFinal = isFinal;
+        }
+
+        void deleteLast(int label, Node target) {
+            assert numArcs > 0;
+            assert label == arcs[numArcs - 1].label;
+            assert target == arcs[numArcs - 1].target;
+            numArcs--;
+        }
+
+        void setLastOutput(int labelToMatch, T newOutput) {
+            assert owner.validOutput(newOutput);
+            assert numArcs > 0;
+            final Arc<T> arc = arcs[numArcs - 1];
+            assert arc.label == labelToMatch;
+            arc.output = newOutput;
+        }
+
+        // pushes an output prefix forward onto all arcs
+        void prependOutput(T outputPrefix) {
+            assert owner.validOutput(outputPrefix);
+
+            for (int arcIdx = 0; arcIdx < numArcs; arcIdx++) {
+                arcs[arcIdx].output = owner.fst.outputs.add(outputPrefix, arcs[arcIdx].output);
+                assert owner.validOutput(arcs[arcIdx].output);
+            }
+
+            if (isFinal) {
+                output = owner.fst.outputs.add(outputPrefix, output);
+                assert owner.validOutput(output);
+            }
+        }
+    }
+
+    /**
+     * Reusable buffer for building nodes with fixed length arcs (binary search or direct addressing).
+     */
+    static class FixedLengthArcsBuffer {
+
+        // Initial capacity is the max length required for the header of a node with fixed length arcs:
+        // header(byte) + numArcs(vint) + numBytes(vint)
+        private byte[] bytes = new byte[11];
+        private final ByteArrayDataOutput bado = new ByteArrayDataOutput(bytes);
+
+        /** Ensures the capacity of the internal byte array. Enlarges it if needed. */
+        FixedLengthArcsBuffer ensureCapacity(int capacity) {
+            if (bytes.length < capacity) {
+                bytes = new byte[ArrayUtil.oversize(capacity, Byte.BYTES)];
+                bado.reset(bytes);
+            }
+            return this;
+        }
+
+        FixedLengthArcsBuffer resetPosition() {
+            bado.reset(bytes);
+            return this;
+        }
+
+        FixedLengthArcsBuffer writeByte(byte b) {
+            bado.writeByte(b);
+            return this;
+        }
+
+        FixedLengthArcsBuffer writeVInt(int i) {
+            try {
+                bado.writeVInt(i);
+            } catch (IOException e) { // Never thrown.
+                throw new RuntimeException(e);
+            }
+            return this;
+        }
+
+        int getPosition() {
+            return bado.getPosition();
+        }
+
+        /** Gets the internal byte array. */
+        byte[] getBytes() {
+            return bytes;
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java
new file mode 100644
index 0000000000000..789c216df6f95
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java
@@ -0,0 +1,660 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST.Arc.BitTable;
+
+import java.io.IOException;
+
+/**
+ * Can next() and advance() through the terms in an FST
+ *
+ * @lucene.experimental
+ */
+abstract class FSTEnum<T> {
+    protected final FST<T> fst;
+
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    protected FST.Arc<T>[] arcs = new FST.Arc[10];
+    // outputs are cumulative
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    protected T[] output = (T[]) new Object[10];
+
+    protected final T NO_OUTPUT;
+    protected final FST.BytesReader fstReader;
+
+    protected int upto;
+    int targetLength;
+
+    /**
+     * doFloor controls the behavior of advance: if it's true doFloor is true, advance positions to
+     * the biggest term before target.
+     */
+    FSTEnum(FST<T> fst) {
+        this.fst = fst;
+        fstReader = fst.getBytesReader();
+        NO_OUTPUT = fst.outputs.getNoOutput();
+        fst.getFirstArc(getArc(0));
+        output[0] = NO_OUTPUT;
+    }
+
+    protected abstract int getTargetLabel();
+
+    protected abstract int getCurrentLabel();
+
+    protected abstract void setCurrentLabel(int label);
+
+    protected abstract void grow();
+
+    /** Rewinds enum state to match the shared prefix between current term and target term */
+    private void rewindPrefix() throws IOException {
+        if (upto == 0) {
+            // System.out.println(" init");
+            upto = 1;
+            fst.readFirstTargetArc(getArc(0), getArc(1), fstReader);
+            return;
+        }
+        // System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength);
+
+        final int currentLimit = upto;
+        upto = 1;
+        while (upto < currentLimit && upto <= targetLength + 1) {
+            final int cmp = getCurrentLabel() - getTargetLabel();
+            if (cmp < 0) {
+                // seek forward
+                // System.out.println(" seek fwd");
+                break;
+            } else if (cmp > 0) {
+                // seek backwards -- reset this arc to the first arc
+                final FST.Arc<T> arc = getArc(upto);
+                fst.readFirstTargetArc(getArc(upto - 1), arc, fstReader);
+                // System.out.println(" seek first arc");
+                break;
+            }
+            upto++;
+        }
+        // System.out.println(" fall through upto=" + upto);
+    }
+
+    protected void doNext() throws IOException {
+        // System.out.println("FE: next upto=" + upto);
+        if (upto == 0) {
+            // System.out.println(" init");
+            upto = 1;
+            fst.readFirstTargetArc(getArc(0), getArc(1), fstReader);
+        } else {
+            // pop
+            // System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" +
+            // arcs[upto].label + " isLast?=" + arcs[upto].isLast());
+            while (arcs[upto].isLast()) {
+                upto--;
+                if (upto == 0) {
+                    // System.out.println(" eof");
+                    return;
+                }
+            }
+            fst.readNextArc(arcs[upto], fstReader);
+        }
+
+        pushFirst();
+    }
+
+    // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
+    // SEEK_END)? saves the eq check above?
+
+    /** Seeks to smallest term that's &gt;= target. */
+    protected void doSeekCeil() throws IOException {
+
+        // System.out.println(" advance len=" + target.length + " curlen=" + current.length);
+
+        // TODO: possibly caller could/should provide common
+        // prefix length? ie this work may be redundant if
+        // caller is in fact intersecting against its own
+        // automaton
+
+        // System.out.println("FE.seekCeil upto=" + upto);
+
+        // Save time by starting at the end of the shared prefix
+        // b/w our current term & the target:
+        rewindPrefix();
+        // System.out.println(" after rewind upto=" + upto);
+
+        FST.Arc<T> arc = getArc(upto);
+        // System.out.println(" init targetLabel=" + targetLabel);
+
+        // Now scan forward, matching the new suffix of the target
+        while (arc != null) {
+            int targetLabel = getTargetLabel();
+            // System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char)
+            // arc.label + ") vs targetLabel=" + targetLabel);
+            if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) {
+                // Arcs are in an array
+                final FST.BytesReader in = fst.getBytesReader();
+                if (arc.nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) {
+                    arc = doSeekCeilArrayDirectAddressing(arc, targetLabel, in);
+                } else {
+                    assert arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH;
+                    arc = doSeekCeilArrayPacked(arc, targetLabel, in);
+                }
+            } else {
+                arc = doSeekCeilList(arc, targetLabel);
+            }
+        }
+    }
+
+    private FST.Arc<T> doSeekCeilArrayDirectAddressing(final FST.Arc<T> arc, final int targetLabel, final FST.BytesReader in)
+        throws IOException {
+        // The array is addressed directly by label, with presence bits to compute the actual arc
+        // offset.
+
+        int targetIndex = targetLabel - arc.firstLabel();
+        if (targetIndex >= arc.numArcs()) {
+            // Target is beyond the last arc, out of label range.
+            // Dead end (target is after the last arc);
+            // rollback to last fork then push
+            upto--;
+            while (true) {
+                if (upto == 0) {
+                    return null;
+                }
+                final FST.Arc<T> prevArc = getArc(upto);
+                // System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + "
+                // isLast?=" + prevArc.isLast());
+                if (prevArc.isLast() == false) {
+                    fst.readNextArc(prevArc, fstReader);
+                    pushFirst();
+                    return null;
+                }
+                upto--;
+            }
+        } else {
+            if (targetIndex < 0) {
+                targetIndex = -1;
+            } else if (BitTable.isBitSet(targetIndex, arc, in)) {
+                fst.readArcByDirectAddressing(arc, in, targetIndex);
+                assert arc.label() == targetLabel;
+                // found -- copy pasta from below
+                output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+                if (targetLabel == FST.END_LABEL) {
+                    return null;
+                }
+                setCurrentLabel(arc.label());
+                incr();
+                return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+            }
+            // Not found, return the next arc (ceil).
+            int ceilIndex = BitTable.nextBitSet(targetIndex, arc, in);
+            assert ceilIndex != -1;
+            fst.readArcByDirectAddressing(arc, in, ceilIndex);
+            assert arc.label() > targetLabel;
+            pushFirst();
+            return null;
+        }
+    }
+
+    private FST.Arc<T> doSeekCeilArrayPacked(final FST.Arc<T> arc, final int targetLabel, final FST.BytesReader in) throws IOException {
+        // The array is packed -- use binary search to find the target.
+        int idx = Util.binarySearch(fst, arc, targetLabel);
+        if (idx >= 0) {
+            // Match
+            fst.readArcByIndex(arc, in, idx);
+            assert arc.arcIdx() == idx;
+            assert arc.label() == targetLabel : "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + idx;
+            output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+            if (targetLabel == FST.END_LABEL) {
+                return null;
+            }
+            setCurrentLabel(arc.label());
+            incr();
+            return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+        }
+        idx = -1 - idx;
+        if (idx == arc.numArcs()) {
+            // Dead end
+            fst.readArcByIndex(arc, in, idx - 1);
+            assert arc.isLast();
+            // Dead end (target is after the last arc);
+            // rollback to last fork then push
+            upto--;
+            while (true) {
+                if (upto == 0) {
+                    return null;
+                }
+                final FST.Arc<T> prevArc = getArc(upto);
+                // System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + "
+                // isLast?=" + prevArc.isLast());
+                if (prevArc.isLast() == false) {
+                    fst.readNextArc(prevArc, fstReader);
+                    pushFirst();
+                    return null;
+                }
+                upto--;
+            }
+        } else {
+            // Ceiling - arc with least higher label
+            fst.readArcByIndex(arc, in, idx);
+            assert arc.label() > targetLabel;
+            pushFirst();
+            return null;
+        }
+    }
+
+    private FST.Arc<T> doSeekCeilList(final FST.Arc<T> arc, final int targetLabel) throws IOException {
+        // Arcs are not array'd -- must do linear scan:
+        if (arc.label() == targetLabel) {
+            // recurse
+            output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+            if (targetLabel == FST.END_LABEL) {
+                return null;
+            }
+            setCurrentLabel(arc.label());
+            incr();
+            return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+        } else if (arc.label() > targetLabel) {
+            pushFirst();
+            return null;
+        } else if (arc.isLast()) {
+            // Dead end (target is after the last arc);
+            // rollback to last fork then push
+            upto--;
+            while (true) {
+                if (upto == 0) {
+                    return null;
+                }
+                final FST.Arc<T> prevArc = getArc(upto);
+                // System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + "
+                // isLast?=" + prevArc.isLast());
+                if (prevArc.isLast() == false) {
+                    fst.readNextArc(prevArc, fstReader);
+                    pushFirst();
+                    return null;
+                }
+                upto--;
+            }
+        } else {
+            // keep scanning
+            // System.out.println(" next scan");
+            fst.readNextArc(arc, fstReader);
+        }
+        return arc;
+    }
+
+    // Todo: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
+    // SEEK_END)? saves the eq check above?
+    /** Seeks to largest term that's &lt;= target. */
+    void doSeekFloor() throws IOException {
+
+        // TODO: possibly caller could/should provide common
+        // prefix length? ie this work may be redundant if
+        // caller is in fact intersecting against its own
+        // automaton
+        // System.out.println("FE: seek floor upto=" + upto);
+
+        // Save CPU by starting at the end of the shared prefix
+        // b/w our current term & the target:
+        rewindPrefix();
+
+        // System.out.println("FE: after rewind upto=" + upto);
+
+        FST.Arc<T> arc = getArc(upto);
+
+        // System.out.println("FE: init targetLabel=" + targetLabel);
+
+        // Now scan forward, matching the new suffix of the target
+        while (arc != null) {
+            // System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char)
+            // arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast() + " bba=" +
+            // arc.bytesPerArc);
+            int targetLabel = getTargetLabel();
+
+            if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) {
+                // Arcs are in an array
+                final FST.BytesReader in = fst.getBytesReader();
+                if (arc.nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) {
+                    arc = doSeekFloorArrayDirectAddressing(arc, targetLabel, in);
+                } else {
+                    assert arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH;
+                    arc = doSeekFloorArrayPacked(arc, targetLabel, in);
+                }
+            } else {
+                arc = doSeekFloorList(arc, targetLabel);
+            }
+        }
+    }
+
+    private FST.Arc<T> doSeekFloorArrayDirectAddressing(FST.Arc<T> arc, int targetLabel, FST.BytesReader in) throws IOException {
+        // The array is addressed directly by label, with presence bits to compute the actual arc
+        // offset.
+
+        int targetIndex = targetLabel - arc.firstLabel();
+        if (targetIndex < 0) {
+            // Before first arc.
+            return backtrackToFloorArc(arc, targetLabel, in);
+        } else if (targetIndex >= arc.numArcs()) {
+            // After last arc.
+            fst.readLastArcByDirectAddressing(arc, in);
+            assert arc.label() < targetLabel;
+            assert arc.isLast();
+            pushLast();
+            return null;
+        } else {
+            // Within label range.
+            if (BitTable.isBitSet(targetIndex, arc, in)) {
+                fst.readArcByDirectAddressing(arc, in, targetIndex);
+                assert arc.label() == targetLabel;
+                // found -- copy pasta from below
+                output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+                if (targetLabel == FST.END_LABEL) {
+                    return null;
+                }
+                setCurrentLabel(arc.label());
+                incr();
+                return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+            }
+            // Scan backwards to find a floor arc.
+            int floorIndex = BitTable.previousBitSet(targetIndex, arc, in);
+            assert floorIndex != -1;
+            fst.readArcByDirectAddressing(arc, in, floorIndex);
+            assert arc.label() < targetLabel;
+            assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
+            pushLast();
+            return null;
+        }
+    }
+
+    /**
+     * Backtracks until it finds a node which first arc is before our target label.` Then on the node,
+     * finds the arc just before the targetLabel.
+     *
+     * @return null to continue the seek floor recursion loop.
+     */
+    private FST.Arc<T> backtrackToFloorArc(FST.Arc<T> arc, int targetLabel, final FST.BytesReader in) throws IOException {
+        while (true) {
+            // First, walk backwards until we find a node which first arc is before our target label.
+            fst.readFirstTargetArc(getArc(upto - 1), arc, fstReader);
+            if (arc.label() < targetLabel) {
+                // Then on this node, find the arc just before the targetLabel.
+                if (arc.isLast() == false) {
+                    if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) {
+                        if (arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) {
+                            findNextFloorArcBinarySearch(arc, targetLabel, in);
+                        } else {
+                            assert arc.nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING;
+                            findNextFloorArcDirectAddressing(arc, targetLabel, in);
+                        }
+                    } else {
+                        while (arc.isLast() == false && fst.readNextArcLabel(arc, in) < targetLabel) {
+                            fst.readNextArc(arc, fstReader);
+                        }
+                    }
+                }
+                assert arc.label() < targetLabel;
+                assert arc.isLast() || fst.readNextArcLabel(arc, in) >= targetLabel;
+                pushLast();
+                return null;
+            }
+            upto--;
+            if (upto == 0) {
+                return null;
+            }
+            targetLabel = getTargetLabel();
+            arc = getArc(upto);
+        }
+    }
+
+    /**
+     * Finds and reads an arc on the current node which label is strictly less than the given label.
+     * Skips the first arc, finds next floor arc; or none if the floor arc is the first arc itself (in
+     * this case it has already been read).
+     *
+     * <p>Precondition: the given arc is the first arc of the node.
+     */
+    private void findNextFloorArcDirectAddressing(FST.Arc<T> arc, int targetLabel, final FST.BytesReader in) throws IOException {
+        assert arc.nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING;
+        assert arc.label() != FST.END_LABEL;
+        assert arc.label() == arc.firstLabel();
+        if (arc.numArcs() > 1) {
+            int targetIndex = targetLabel - arc.firstLabel();
+            assert targetIndex >= 0;
+            if (targetIndex >= arc.numArcs()) {
+                // Beyond last arc. Take last arc.
+                fst.readLastArcByDirectAddressing(arc, in);
+            } else {
+                // Take the preceding arc, even if the target is present.
+                int floorIndex = BitTable.previousBitSet(targetIndex, arc, in);
+                if (floorIndex > 0) {
+                    fst.readArcByDirectAddressing(arc, in, floorIndex);
+                }
+            }
+        }
+    }
+
+    /** Same as {@link #findNextFloorArcDirectAddressing} for binary search node. */
+    private void findNextFloorArcBinarySearch(FST.Arc<T> arc, int targetLabel, FST.BytesReader in) throws IOException {
+        assert arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH;
+        assert arc.label() != FST.END_LABEL;
+        assert arc.arcIdx() == 0;
+        if (arc.numArcs() > 1) {
+            int idx = Util.binarySearch(fst, arc, targetLabel);
+            assert idx != -1;
+            if (idx > 1) {
+                fst.readArcByIndex(arc, in, idx - 1);
+            } else if (idx < -2) {
+                fst.readArcByIndex(arc, in, -2 - idx);
+            }
+        }
+    }
+
+    private FST.Arc<T> doSeekFloorArrayPacked(FST.Arc<T> arc, int targetLabel, final FST.BytesReader in) throws IOException {
+        // Arcs are fixed array -- use binary search to find the target.
+        int idx = Util.binarySearch(fst, arc, targetLabel);
+
+        if (idx >= 0) {
+            // Match -- recurse
+            // System.out.println(" match! arcIdx=" + idx);
+            fst.readArcByIndex(arc, in, idx);
+            assert arc.arcIdx() == idx;
+            assert arc.label() == targetLabel : "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + idx;
+            output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+            if (targetLabel == FST.END_LABEL) {
+                return null;
+            }
+            setCurrentLabel(arc.label());
+            incr();
+            return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+        } else if (idx == -1) {
+            // Before first arc.
+            return backtrackToFloorArc(arc, targetLabel, in);
+        } else {
+            // There is a floor arc; idx will be (-1 - (floor + 1)).
+            fst.readArcByIndex(arc, in, -2 - idx);
+            assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
+            assert arc.label() < targetLabel : "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
+            pushLast();
+            return null;
+        }
+    }
+
+    private FST.Arc<T> doSeekFloorList(FST.Arc<T> arc, int targetLabel) throws IOException {
+        if (arc.label() == targetLabel) {
+            // Match -- recurse
+            output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+            if (targetLabel == FST.END_LABEL) {
+                return null;
+            }
+            setCurrentLabel(arc.label());
+            incr();
+            return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+        } else if (arc.label() > targetLabel) {
+            // TODO: if each arc could somehow read the arc just
+            // before, we can save this re-scan. The ceil case
+            // doesn't need this because it reads the next arc
+            // instead:
+            while (true) {
+                // First, walk backwards until we find a first arc
+                // that's before our target label:
+                fst.readFirstTargetArc(getArc(upto - 1), arc, fstReader);
+                if (arc.label() < targetLabel) {
+                    // Then, scan forwards to the arc just before
+                    // the targetLabel:
+                    while (arc.isLast() == false && fst.readNextArcLabel(arc, fstReader) < targetLabel) {
+                        fst.readNextArc(arc, fstReader);
+                    }
+                    pushLast();
+                    return null;
+                }
+                upto--;
+                if (upto == 0) {
+                    return null;
+                }
+                targetLabel = getTargetLabel();
+                arc = getArc(upto);
+            }
+        } else if (arc.isLast() == false) {
+            // System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char)
+            // fst.readNextArcLabel(arc) + ")");
+            if (fst.readNextArcLabel(arc, fstReader) > targetLabel) {
+                pushLast();
+                return null;
+            } else {
+                // keep scanning
+                return fst.readNextArc(arc, fstReader);
+            }
+        } else {
+            pushLast();
+            return null;
+        }
+    }
+
+    /** Seeks to exactly target term. */
+    boolean doSeekExact() throws IOException {
+
+        // TODO: possibly caller could/should provide common
+        // prefix length? ie this work may be redundant if
+        // caller is in fact intersecting against its own
+        // automaton
+
+        // System.out.println("FE: seek exact upto=" + upto);
+
+        // Save time by starting at the end of the shared prefix
+        // b/w our current term & the target:
+        rewindPrefix();
+
+        // System.out.println("FE: after rewind upto=" + upto);
+        FST.Arc<T> arc = getArc(upto - 1);
+        int targetLabel = getTargetLabel();
+
+        final FST.BytesReader fstReader = fst.getBytesReader();
+
+        while (true) {
+            // System.out.println(" cycle target=" + (targetLabel == -1 ? "-1" : (char) targetLabel));
+            final FST.Arc<T> nextArc = fst.findTargetArc(targetLabel, arc, getArc(upto), fstReader);
+            if (nextArc == null) {
+                // short circuit
+                // upto--;
+                // upto = 0;
+                fst.readFirstTargetArc(arc, getArc(upto), fstReader);
+                // System.out.println(" no match upto=" + upto);
+                return false;
+            }
+            // Match -- recurse:
+            output[upto] = fst.outputs.add(output[upto - 1], nextArc.output());
+            if (targetLabel == FST.END_LABEL) {
+                // System.out.println(" return found; upto=" + upto + " output=" + output[upto] + "
+                // nextArc=" + nextArc.isLast());
+                return true;
+            }
+            setCurrentLabel(targetLabel);
+            incr();
+            targetLabel = getTargetLabel();
+            arc = nextArc;
+        }
+    }
+
+    private void incr() {
+        upto++;
+        grow();
+        if (arcs.length <= upto) {
+            @SuppressWarnings({ "rawtypes", "unchecked" })
+            final FST.Arc<T>[] newArcs = new FST.Arc[ArrayUtil.oversize(1 + upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+            System.arraycopy(arcs, 0, newArcs, 0, arcs.length);
+            arcs = newArcs;
+        }
+        if (output.length <= upto) {
+            @SuppressWarnings({ "rawtypes", "unchecked" })
+            final T[] newOutput = (T[]) new Object[ArrayUtil.oversize(1 + upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+            System.arraycopy(output, 0, newOutput, 0, output.length);
+            output = newOutput;
+        }
+    }
+
+    // Appends current arc, and then recurses from its target,
+    // appending first arc all the way to the final node
+    private void pushFirst() throws IOException {
+
+        FST.Arc<T> arc = arcs[upto];
+        assert arc != null;
+
+        while (true) {
+            output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+            if (arc.label() == FST.END_LABEL) {
+                // Final node
+                break;
+            }
+            // System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" +
+            // fst.outputs.outputToString(output[upto]));
+            setCurrentLabel(arc.label());
+            incr();
+
+            final FST.Arc<T> nextArc = getArc(upto);
+            fst.readFirstTargetArc(arc, nextArc, fstReader);
+            arc = nextArc;
+        }
+    }
+
+    // Recurses from current arc, appending last arc all the
+    // way to the first final node
+    private void pushLast() throws IOException {
+
+        FST.Arc<T> arc = arcs[upto];
+        assert arc != null;
+
+        while (true) {
+            setCurrentLabel(arc.label());
+            output[upto] = fst.outputs.add(output[upto - 1], arc.output());
+            if (arc.label() == FST.END_LABEL) {
+                // Final node
+                break;
+            }
+            incr();
+
+            arc = fst.readLastTargetArc(arc, getArc(upto), fstReader);
+        }
+    }
+
+    private FST.Arc<T> getArc(int idx) {
+        if (arcs[idx] == null) {
+            arcs[idx] = new FST.Arc<>();
+        }
+        return arcs[idx];
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTStore.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTStore.java
new file mode 100644
index 0000000000000..c0648ac6b6a83
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTStore.java
@@ -0,0 +1,37 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.Accountable;
+
+import java.io.IOException;
+
+/** Abstraction for reading/writing bytes necessary for FST. */
+public interface FSTStore extends Accountable {
+    void init(DataInput in, long numBytes) throws IOException;
+
+    long size();
+
+    FST.BytesReader getReverseBytesReader();
+
+    void writeTo(DataOutput out) throws IOException;
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ForwardBytesReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ForwardBytesReader.java
new file mode 100644
index 0000000000000..dcabb3a4d68f2
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ForwardBytesReader.java
@@ -0,0 +1,64 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+// TODO: can we use just ByteArrayDataInput...?  need to
+// add a .skipBytes to DataInput.. hmm and .setPosition
+
+/** Reads from a single byte[]. */
+final class ForwardBytesReader extends FST.BytesReader {
+    private final byte[] bytes;
+    private int pos;
+
+    ForwardBytesReader(byte[] bytes) {
+        this.bytes = bytes;
+    }
+
+    @Override
+    public byte readByte() {
+        return bytes[pos++];
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) {
+        System.arraycopy(bytes, pos, b, offset, len);
+        pos += len;
+    }
+
+    @Override
+    public void skipBytes(long count) {
+        pos += count;
+    }
+
+    @Override
+    public long getPosition() {
+        return pos;
+    }
+
+    @Override
+    public void setPosition(long pos) {
+        this.pos = (int) pos;
+    }
+
+    @Override
+    public boolean reversed() {
+        return false;
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/NodeHash.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/NodeHash.java
new file mode 100644
index 0000000000000..f0b8364e9f1bf
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/NodeHash.java
@@ -0,0 +1,192 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PagedGrowableWriter;
+
+import java.io.IOException;
+
+// Used to dedup states (lookup already-frozen states)
+final class NodeHash<T> {
+
+    private PagedGrowableWriter table;
+    private long count;
+    private long mask;
+    private final FST<T> fst;
+    private final FST.Arc<T> scratchArc = new FST.Arc<>();
+    private final FST.BytesReader in;
+
+    NodeHash(FST<T> fst, FST.BytesReader in) {
+        table = new PagedGrowableWriter(16, 1 << 27, 8, PackedInts.COMPACT);
+        mask = 15;
+        this.fst = fst;
+        this.in = in;
+    }
+
+    private boolean nodesEqual(FSTCompiler.UnCompiledNode<T> node, long address) throws IOException {
+        fst.readFirstRealTargetArc(address, scratchArc, in);
+
+        // Fail fast for a node with fixed length arcs.
+        if (scratchArc.bytesPerArc() != 0) {
+            if (scratchArc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) {
+                if (node.numArcs != scratchArc.numArcs()) {
+                    return false;
+                }
+            } else {
+                assert scratchArc.nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING;
+                if ((node.arcs[node.numArcs - 1].label - node.arcs[0].label + 1) != scratchArc.numArcs()
+                    || node.numArcs != FST.Arc.BitTable.countBits(scratchArc, in)) {
+                    return false;
+                }
+            }
+        }
+
+        for (int arcUpto = 0; arcUpto < node.numArcs; arcUpto++) {
+            final FSTCompiler.Arc<T> arc = node.arcs[arcUpto];
+            if (arc.label != scratchArc.label()
+                || arc.output.equals(scratchArc.output()) == false
+                || ((FSTCompiler.CompiledNode) arc.target).node != scratchArc.target()
+                || arc.nextFinalOutput.equals(scratchArc.nextFinalOutput()) == false
+                || arc.isFinal != scratchArc.isFinal()) {
+                return false;
+            }
+
+            if (scratchArc.isLast()) {
+                if (arcUpto == node.numArcs - 1) {
+                    return true;
+                } else {
+                    return false;
+                }
+            }
+            fst.readNextRealArc(scratchArc, in);
+        }
+
+        return false;
+    }
+
+    // hash code for an unfrozen node. This must be identical
+    // to the frozen case (below)!!
+    private long hash(FSTCompiler.UnCompiledNode<T> node) {
+        final int PRIME = 31;
+        // System.out.println("hash unfrozen");
+        long h = 0;
+        // TODO: maybe if number of arcs is high we can safely subsample?
+        for (int arcIdx = 0; arcIdx < node.numArcs; arcIdx++) {
+            final FSTCompiler.Arc<T> arc = node.arcs[arcIdx];
+            // System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode)
+            // arc.target).node + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + "
+            // isFinal?=" + arc.isFinal);
+            h = PRIME * h + arc.label;
+            long n = ((FSTCompiler.CompiledNode) arc.target).node;
+            h = PRIME * h + (int) (n ^ (n >> 32));
+            h = PRIME * h + arc.output.hashCode();
+            h = PRIME * h + arc.nextFinalOutput.hashCode();
+            if (arc.isFinal) {
+                h += 17;
+            }
+        }
+        // System.out.println(" ret " + (h&Integer.MAX_VALUE));
+        return h & Long.MAX_VALUE;
+    }
+
+    // hash code for a frozen node
+    private long hash(long node) throws IOException {
+        final int PRIME = 31;
+        // System.out.println("hash frozen node=" + node);
+        long h = 0;
+        fst.readFirstRealTargetArc(node, scratchArc, in);
+        while (true) {
+            // System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" +
+            // h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" +
+            // scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
+            h = PRIME * h + scratchArc.label();
+            h = PRIME * h + (int) (scratchArc.target() ^ (scratchArc.target() >> 32));
+            h = PRIME * h + scratchArc.output().hashCode();
+            h = PRIME * h + scratchArc.nextFinalOutput().hashCode();
+            if (scratchArc.isFinal()) {
+                h += 17;
+            }
+            if (scratchArc.isLast()) {
+                break;
+            }
+            fst.readNextRealArc(scratchArc, in);
+        }
+        // System.out.println(" ret " + (h&Integer.MAX_VALUE));
+        return h & Long.MAX_VALUE;
+    }
+
+    public long add(FSTCompiler<T> fstCompiler, FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
+        // System.out.println("hash: add count=" + count + " vs " + table.size() + " mask=" + mask);
+        final long h = hash(nodeIn);
+        long pos = h & mask;
+        int c = 0;
+        while (true) {
+            final long v = table.get(pos);
+            if (v == 0) {
+                // freeze & add
+                final long node = fst.addNode(fstCompiler, nodeIn);
+                // System.out.println(" now freeze node=" + node);
+                assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
+                count++;
+                table.set(pos, node);
+                // Rehash at 2/3 occupancy:
+                if (count > 2 * table.size() / 3) {
+                    rehash();
+                }
+                return node;
+            } else if (nodesEqual(nodeIn, v)) {
+                // same node is already here
+                return v;
+            }
+
+            // quadratic probe
+            pos = (pos + (++c)) & mask;
+        }
+    }
+
+    // called only by rehash
+    private void addNew(long address) throws IOException {
+        long pos = hash(address) & mask;
+        int c = 0;
+        while (true) {
+            if (table.get(pos) == 0) {
+                table.set(pos, address);
+                break;
+            }
+
+            // quadratic probe
+            pos = (pos + (++c)) & mask;
+        }
+    }
+
+    private void rehash() throws IOException {
+        final PagedGrowableWriter oldTable = table;
+
+        table = new PagedGrowableWriter(2 * oldTable.size(), 1 << 30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
+        mask = table.size() - 1;
+        for (long idx = 0; idx < oldTable.size(); idx++) {
+            final long address = oldTable.get(idx);
+            if (address != 0) {
+                addNew(address);
+            }
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java
new file mode 100644
index 0000000000000..f0246cbf5c862
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java
@@ -0,0 +1,79 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+
+/**
+ * Provides off heap storage of finite state machine (FST), using underlying index input instead of
+ * byte store on heap
+ *
+ * @lucene.experimental
+ */
+public final class OffHeapFSTStore implements FSTStore {
+
+    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OffHeapFSTStore.class);
+
+    private IndexInput in;
+    private long offset;
+    private long numBytes;
+
+    @Override
+    public void init(DataInput in, long numBytes) throws IOException {
+        if (in instanceof IndexInput) {
+            this.in = (IndexInput) in;
+            this.numBytes = numBytes;
+            this.offset = this.in.getFilePointer();
+        } else {
+            throw new IllegalArgumentException(
+                "parameter:in should be an instance of IndexInput for using OffHeapFSTStore, not a " + in.getClass().getName()
+            );
+        }
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return BASE_RAM_BYTES_USED;
+    }
+
+    @Override
+    public long size() {
+        return numBytes;
+    }
+
+    @Override
+    public FST.BytesReader getReverseBytesReader() {
+        try {
+            return new ReverseRandomAccessReader(in.randomAccessSlice(offset, numBytes));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void writeTo(DataOutput out) throws IOException {
+        throw new UnsupportedOperationException("writeToOutput operation is not supported for OffHeapFSTStore");
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java
new file mode 100644
index 0000000000000..646e56f095d9a
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java
@@ -0,0 +1,103 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+
+/**
+ * Provides storage of finite state machine (FST), using byte array or byte store allocated on heap.
+ *
+ * @lucene.experimental
+ */
+public final class OnHeapFSTStore implements FSTStore {
+
+    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OnHeapFSTStore.class);
+
+    /**
+     * A {@link BytesStore}, used during building, or during reading when the FST is very large (more
+     * than 1 GB). If the FST is less than 1 GB then bytesArray is set instead.
+     */
+    private BytesStore bytes;
+
+    /** Used at read time when the FST fits into a single byte[]. */
+    private byte[] bytesArray;
+
+    private final int maxBlockBits;
+
+    public OnHeapFSTStore(int maxBlockBits) {
+        if (maxBlockBits < 1 || maxBlockBits > 30) {
+            throw new IllegalArgumentException("maxBlockBits should be 1 .. 30; got " + maxBlockBits);
+        }
+
+        this.maxBlockBits = maxBlockBits;
+    }
+
+    @Override
+    public void init(DataInput in, long numBytes) throws IOException {
+        if (numBytes > 1 << this.maxBlockBits) {
+            // FST is big: we need multiple pages
+            bytes = new BytesStore(in, numBytes, 1 << this.maxBlockBits);
+        } else {
+            // FST fits into a single block: use ByteArrayBytesStoreReader for less overhead
+            bytesArray = new byte[(int) numBytes];
+            in.readBytes(bytesArray, 0, bytesArray.length);
+        }
+    }
+
+    @Override
+    public long size() {
+        if (bytesArray != null) {
+            return bytesArray.length;
+        } else {
+            return bytes.ramBytesUsed();
+        }
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return BASE_RAM_BYTES_USED + size();
+    }
+
+    @Override
+    public FST.BytesReader getReverseBytesReader() {
+        if (bytesArray != null) {
+            return new ReverseBytesReader(bytesArray);
+        } else {
+            return bytes.getReverseReader();
+        }
+    }
+
+    @Override
+    public void writeTo(DataOutput out) throws IOException {
+        if (bytes != null) {
+            long numBytes = bytes.getPosition();
+            out.writeVLong(numBytes);
+            bytes.writeTo(out);
+        } else {
+            assert bytesArray != null;
+            out.writeVLong(bytesArray.length);
+            out.writeBytes(bytesArray, 0, bytesArray.length);
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java
new file mode 100644
index 0000000000000..a7c5ed8933fed
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java
@@ -0,0 +1,108 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.Accountable;
+
+import java.io.IOException;
+
+/**
+ * Represents the outputs for an FST, providing the basic algebra required for building and
+ * traversing the FST.
+ *
+ * <p>Note that any operation that returns NO_OUTPUT must return the same singleton object from
+ * {@link #getNoOutput}.
+ *
+ * @lucene.experimental
+ */
+public abstract class Outputs<T> {
+
+    // TODO: maybe change this API to allow for re-use of the
+    // output instances -- this is an insane amount of garbage
+    // (new object per byte/char/int) if eg used during
+    // analysis
+
+    /** Eg common("foobar", "food") -&gt; "foo" */
+    public abstract T common(T output1, T output2);
+
+    /** Eg subtract("foobar", "foo") -&gt; "bar" */
+    public abstract T subtract(T output, T inc);
+
+    /** Eg add("foo", "bar") -&gt; "foobar" */
+    public abstract T add(T prefix, T output);
+
+    /** Encode an output value into a {@link DataOutput}. */
+    public abstract void write(T output, DataOutput out) throws IOException;
+
+    /**
+     * Encode an final node output value into a {@link DataOutput}. By default this just calls {@link
+     * #write(Object, DataOutput)}.
+     */
+    public void writeFinalOutput(T output, DataOutput out) throws IOException {
+        write(output, out);
+    }
+
+    /** Decode an output value previously written with {@link #write(Object, DataOutput)}. */
+    public abstract T read(DataInput in) throws IOException;
+
+    /** Skip the output; defaults to just calling {@link #read} and discarding the result. */
+    public void skipOutput(DataInput in) throws IOException {
+        read(in);
+    }
+
+    /**
+     * Decode an output value previously written with {@link #writeFinalOutput(Object, DataOutput)}.
+     * By default this just calls {@link #read(DataInput)}.
+     */
+    public T readFinalOutput(DataInput in) throws IOException {
+        return read(in);
+    }
+
+    /**
+     * Skip the output previously written with {@link #writeFinalOutput}; defaults to just calling
+     * {@link #readFinalOutput} and discarding the result.
+     */
+    public void skipFinalOutput(DataInput in) throws IOException {
+        skipOutput(in);
+    }
+
+    /**
+     * NOTE: this output is compared with == so you must ensure that all methods return the single
+     * object if it's really no output
+     */
+    public abstract T getNoOutput();
+
+    public abstract String outputToString(T output);
+
+    // TODO: maybe make valid(T output) public...? for asserts
+
+    public T merge(T first, T second) {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Return memory usage for the provided output.
+     *
+     * @see Accountable
+     */
+    public abstract long ramBytesUsed(T output);
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseBytesReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseBytesReader.java
new file mode 100644
index 0000000000000..8d22cc77694dd
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseBytesReader.java
@@ -0,0 +1,62 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+/** Reads in reverse from a single byte[]. */
+final class ReverseBytesReader extends FST.BytesReader {
+    private final byte[] bytes;
+    private int pos;
+
+    ReverseBytesReader(byte[] bytes) {
+        this.bytes = bytes;
+    }
+
+    @Override
+    public byte readByte() {
+        return bytes[pos--];
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) {
+        for (int i = 0; i < len; i++) {
+            b[offset + i] = bytes[pos--];
+        }
+    }
+
+    @Override
+    public void skipBytes(long count) {
+        pos -= count;
+    }
+
+    @Override
+    public long getPosition() {
+        return pos;
+    }
+
+    @Override
+    public void setPosition(long pos) {
+        this.pos = (int) pos;
+    }
+
+    @Override
+    public boolean reversed() {
+        return true;
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseRandomAccessReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseRandomAccessReader.java
new file mode 100644
index 0000000000000..55eca99aaeb1e
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ReverseRandomAccessReader.java
@@ -0,0 +1,67 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.store.RandomAccessInput;
+
+import java.io.IOException;
+
+/** Implements reverse read from a RandomAccessInput. */
+final class ReverseRandomAccessReader extends FST.BytesReader {
+    private final RandomAccessInput in;
+    private long pos;
+
+    ReverseRandomAccessReader(RandomAccessInput in) {
+        this.in = in;
+    }
+
+    @Override
+    public byte readByte() throws IOException {
+        return in.readByte(pos--);
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) throws IOException {
+        int i = offset, end = offset + len;
+        while (i < end) {
+            b[i++] = in.readByte(pos--);
+        }
+    }
+
+    @Override
+    public void skipBytes(long count) {
+        pos -= count;
+    }
+
+    @Override
+    public long getPosition() {
+        return pos;
+    }
+
+    @Override
+    public void setPosition(long pos) {
+        this.pos = pos;
+    }
+
+    @Override
+    public boolean reversed() {
+        return true;
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java
new file mode 100644
index 0000000000000..ce2ac82d478b6
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java
@@ -0,0 +1,903 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST.Arc;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST.Arc.BitTable;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST.BytesReader;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeSet;
+
+/**
+ * Static helper methods.
+ *
+ * @lucene.experimental
+ */
+public final class Util {
+    private Util() {}
+
+    /** Looks up the output for this input, or null if the input is not accepted. */
+    public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
+
+        // TODO: would be nice not to alloc this on every lookup
+        final Arc<T> arc = fst.getFirstArc(new Arc<>());
+
+        final BytesReader fstReader = fst.getBytesReader();
+
+        // Accumulate output as we go
+        T output = fst.outputs.getNoOutput();
+        for (int i = 0; i < input.length; i++) {
+            if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
+                return null;
+            }
+            output = fst.outputs.add(output, arc.output());
+        }
+
+        if (arc.isFinal()) {
+            return fst.outputs.add(output, arc.nextFinalOutput());
+        } else {
+            return null;
+        }
+    }
+
+    // TODO: maybe a CharsRef version for BYTE2
+
+    /** Looks up the output for this input, or null if the input is not accepted */
+    public static <T> T get(FST<T> fst, BytesRef input) throws IOException {
+        assert fst.inputType == FST.INPUT_TYPE.BYTE1;
+
+        final BytesReader fstReader = fst.getBytesReader();
+
+        // TODO: would be nice not to alloc this on every lookup
+        final Arc<T> arc = fst.getFirstArc(new Arc<>());
+
+        // Accumulate output as we go
+        T output = fst.outputs.getNoOutput();
+        for (int i = 0; i < input.length; i++) {
+            if (fst.findTargetArc(input.bytes[i + input.offset] & 0xFF, arc, arc, fstReader) == null) {
+                return null;
+            }
+            output = fst.outputs.add(output, arc.output());
+        }
+
+        if (arc.isFinal()) {
+            return fst.outputs.add(output, arc.nextFinalOutput());
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Represents a path in TopNSearcher.
+     *
+     * @lucene.experimental
+     */
+    public static class FSTPath<T> {
+        /** Holds the last arc appended to this path */
+        public Arc<T> arc;
+        /** Holds cost plus any usage-specific output: */
+        public T output;
+
+        public final IntsRefBuilder input;
+        public final float boost;
+        public final CharSequence context;
+
+        // Custom int payload for consumers; the NRT suggester uses this to record if this path has
+        // already enumerated a surface form
+        public int payload;
+
+        FSTPath(T output, Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
+            this.arc = new Arc<T>().copyFrom(arc);
+            this.output = output;
+            this.input = input;
+            this.boost = boost;
+            this.context = context;
+            this.payload = payload;
+        }
+
+        FSTPath<T> newPath(T output, IntsRefBuilder input) {
+            return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload);
+        }
+
+        @Override
+        public String toString() {
+            return "input=" + input.get() + " output=" + output + " context=" + context + " boost=" + boost + " payload=" + payload;
+        }
+    }
+
+    /** Compares first by the provided comparator, and then tie breaks by path.input. */
+    private static class TieBreakByInputComparator<T> implements Comparator<FSTPath<T>> {
+        private final Comparator<T> comparator;
+
+        TieBreakByInputComparator(Comparator<T> comparator) {
+            this.comparator = comparator;
+        }
+
+        @Override
+        public int compare(FSTPath<T> a, FSTPath<T> b) {
+            int cmp = comparator.compare(a.output, b.output);
+            if (cmp == 0) {
+                return a.input.get().compareTo(b.input.get());
+            } else {
+                return cmp;
+            }
+        }
+    }
+
+    /** Utility class to find top N shortest paths from start point(s). */
+    public static class TopNSearcher<T> {
+
+        private final FST<T> fst;
+        private final BytesReader bytesReader;
+        private final int topN;
+        private final int maxQueueDepth;
+
+        private final Arc<T> scratchArc = new Arc<>();
+
+        private final Comparator<T> comparator;
+        private final Comparator<FSTPath<T>> pathComparator;
+
+        TreeSet<FSTPath<T>> queue;
+
+        /**
+         * Creates an unbounded TopNSearcher
+         *
+         * @param fst the {@link FST} to search on
+         * @param topN the number of top scoring entries to retrieve
+         * @param maxQueueDepth the maximum size of the queue of possible top entries
+         * @param comparator the comparator to select the top N
+         */
+        public TopNSearcher(FST<T> fst, int topN, int maxQueueDepth, Comparator<T> comparator) {
+            this(fst, topN, maxQueueDepth, comparator, new TieBreakByInputComparator<>(comparator));
+        }
+
+        public TopNSearcher(FST<T> fst, int topN, int maxQueueDepth, Comparator<T> comparator, Comparator<FSTPath<T>> pathComparator) {
+            this.fst = fst;
+            this.bytesReader = fst.getBytesReader();
+            this.topN = topN;
+            this.maxQueueDepth = maxQueueDepth;
+            this.comparator = comparator;
+            this.pathComparator = pathComparator;
+            queue = new TreeSet<>(pathComparator);
+        }
+
+        // If back plus this arc is competitive then add to queue:
+        protected void addIfCompetitive(FSTPath<T> path) {
+
+            assert queue != null;
+
+            T output = fst.outputs.add(path.output, path.arc.output());
+
+            if (queue.size() == maxQueueDepth) {
+                FSTPath<T> bottom = queue.last();
+                int comp = pathComparator.compare(path, bottom);
+                if (comp > 0) {
+                    // Doesn't compete
+                    return;
+                } else if (comp == 0) {
+                    // Tie break by alpha sort on the input:
+                    path.input.append(path.arc.label());
+                    final int cmp = bottom.input.get().compareTo(path.input.get());
+                    path.input.setLength(path.input.length() - 1);
+
+                    // We should never see dups:
+                    assert cmp != 0;
+
+                    if (cmp < 0) {
+                        // Doesn't compete
+                        return;
+                    }
+                }
+                // Competes
+            }
+            // else ... Queue isn't full yet, so any path we hit competes:
+
+            // copy over the current input to the new input
+            // and add the arc.label to the end
+            IntsRefBuilder newInput = new IntsRefBuilder();
+            newInput.copyInts(path.input.get());
+            newInput.append(path.arc.label());
+
+            FSTPath<T> newPath = path.newPath(output, newInput);
+            if (acceptPartialPath(newPath)) {
+                queue.add(newPath);
+                if (queue.size() == maxQueueDepth + 1) {
+                    queue.pollLast();
+                }
+            }
+        }
+
+        public void addStartPaths(Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {
+            addStartPaths(node, startOutput, allowEmptyString, input, 0, null, -1);
+        }
+
+        /**
+         * Adds all leaving arcs, including 'finished' arc, if the node is final, from this node into
+         * the queue.
+         */
+        public void addStartPaths(
+            Arc<T> node,
+            T startOutput,
+            boolean allowEmptyString,
+            IntsRefBuilder input,
+            float boost,
+            CharSequence context,
+            int payload
+        ) throws IOException {
+
+            // De-dup NO_OUTPUT since it must be a singleton:
+            if (startOutput.equals(fst.outputs.getNoOutput())) {
+                startOutput = fst.outputs.getNoOutput();
+            }
+
+            FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context, payload);
+            fst.readFirstTargetArc(node, path.arc, bytesReader);
+
+            // Bootstrap: find the min starting arc
+            while (true) {
+                if (allowEmptyString || path.arc.label() != FST.END_LABEL) {
+                    addIfCompetitive(path);
+                }
+                if (path.arc.isLast()) {
+                    break;
+                }
+                fst.readNextArc(path.arc, bytesReader);
+            }
+        }
+
+        public TopResults<T> search() throws IOException {
+
+            final List<Result<T>> results = new ArrayList<>();
+
+            final BytesReader fstReader = fst.getBytesReader();
+            final T NO_OUTPUT = fst.outputs.getNoOutput();
+
+            // TODO: we could enable FST to sorting arcs by weight
+            // as it freezes... can easily do this on first pass
+            // (w/o requiring rewrite)
+
+            // TODO: maybe we should make an FST.INPUT_TYPE.BYTE0.5!?
+            // (nibbles)
+            int rejectCount = 0;
+
+            // For each top N path:
+            while (results.size() < topN) {
+
+                FSTPath<T> path;
+
+                if (queue == null) {
+                    // Ran out of paths
+                    break;
+                }
+
+                // Remove top path since we are now going to
+                // pursue it:
+                path = queue.pollFirst();
+
+                if (path == null) {
+                    // There were less than topN paths available:
+                    break;
+                }
+                // System.out.println("pop path=" + path + " arc=" + path.arc.output);
+
+                if (acceptPartialPath(path) == false) {
+                    continue;
+                }
+
+                if (path.arc.label() == FST.END_LABEL) {
+                    // Empty string!
+                    path.input.setLength(path.input.length() - 1);
+                    results.add(new Result<>(path.input.get(), path.output));
+                    continue;
+                }
+
+                if (results.size() == topN - 1 && maxQueueDepth == topN) {
+                    // Last path -- don't bother w/ queue anymore:
+                    queue = null;
+                }
+
+                // We take path and find its "0 output completion",
+                // ie, just keep traversing the first arc with
+                // NO_OUTPUT that we can find, since this must lead
+                // to the minimum path that completes from
+                // path.arc.
+
+                // For each input letter:
+                while (true) {
+
+                    fst.readFirstTargetArc(path.arc, path.arc, fstReader);
+
+                    // For each arc leaving this node:
+                    boolean foundZero = false;
+                    boolean arcCopyIsPending = false;
+                    while (true) {
+                        // tricky: instead of comparing output == 0, we must
+                        // express it via the comparator compare(output, 0) == 0
+                        if (comparator.compare(NO_OUTPUT, path.arc.output()) == 0) {
+                            if (queue == null) {
+                                foundZero = true;
+                                break;
+                            } else if (foundZero == false) {
+                                arcCopyIsPending = true;
+                                foundZero = true;
+                            } else {
+                                addIfCompetitive(path);
+                            }
+                        } else if (queue != null) {
+                            addIfCompetitive(path);
+                        }
+                        if (path.arc.isLast()) {
+                            break;
+                        }
+                        if (arcCopyIsPending) {
+                            scratchArc.copyFrom(path.arc);
+                            arcCopyIsPending = false;
+                        }
+                        fst.readNextArc(path.arc, fstReader);
+                    }
+
+                    assert foundZero;
+
+                    if (queue != null && arcCopyIsPending == false) {
+                        path.arc.copyFrom(scratchArc);
+                    }
+
+                    if (path.arc.label() == FST.END_LABEL) {
+                        // Add final output:
+                        path.output = fst.outputs.add(path.output, path.arc.output());
+                        if (acceptResult(path)) {
+                            results.add(new Result<>(path.input.get(), path.output));
+                        } else {
+                            rejectCount++;
+                        }
+                        break;
+                    } else {
+                        path.input.append(path.arc.label());
+                        path.output = fst.outputs.add(path.output, path.arc.output());
+                        if (acceptPartialPath(path) == false) {
+                            break;
+                        }
+                    }
+                }
+            }
+            return new TopResults<>(rejectCount + topN <= maxQueueDepth, results);
+        }
+
+        protected boolean acceptResult(FSTPath<T> path) {
+            return acceptResult(path.input.get(), path.output);
+        }
+
+        /** Override this to prevent considering a path before it's complete */
+        protected boolean acceptPartialPath(FSTPath<T> path) {
+            return true;
+        }
+
+        protected boolean acceptResult(IntsRef input, T output) {
+            return true;
+        }
+    }
+
+    /**
+     * Holds a single input (IntsRef) + output, returned by {@link #shortestPaths shortestPaths()}.
+     */
+    public static final class Result<T> {
+        public final IntsRef input;
+        public final T output;
+
+        public Result(IntsRef input, T output) {
+            this.input = input;
+            this.output = output;
+        }
+    }
+
+    /** Holds the results for a top N search using {@link TopNSearcher} */
+    public static final class TopResults<T> implements Iterable<Result<T>> {
+
+        /**
+         * <code>true</code> iff this is a complete result ie. if the specified queue size was large
+         * enough to find the complete list of results. This might be <code>false</code> if the {@link
+         * TopNSearcher} rejected too many results.
+         */
+        public final boolean isComplete;
+        /** The top results */
+        public final List<Result<T>> topN;
+
+        TopResults(boolean isComplete, List<Result<T>> topN) {
+            this.topN = topN;
+            this.isComplete = isComplete;
+        }
+
+        @Override
+        public Iterator<Result<T>> iterator() {
+            return topN.iterator();
+        }
+    }
+
+    /** Starting from node, find the top N min cost completions to a final node. */
+    public static <T> TopResults<T> shortestPaths(
+        FST<T> fst,
+        Arc<T> fromNode,
+        T startOutput,
+        Comparator<T> comparator,
+        int topN,
+        boolean allowEmptyString
+    ) throws IOException {
+
+        // All paths are kept, so we can pass topN for
+        // maxQueueDepth and the pruning is admissible:
+        TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator);
+
+        // since this search is initialized with a single start node
+        // it is okay to start with an empty input path here
+        searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder());
+        return searcher.search();
+    }
+
+    /**
+     * Dumps an {@link FST} to a GraphViz's <code>dot</code> language description for visualization.
+     * Example of use:
+     *
+     * <pre class="prettyprint">
+     * PrintWriter pw = new PrintWriter(&quot;out.dot&quot;);
+     * Util.toDot(fst, pw, true, true);
+     * pw.close();
+     * </pre>
+     *
+     * and then, from command line:
+     *
+     * <pre>
+     * dot -Tpng -o out.png out.dot
+     * </pre>
+     *
+     * <p>Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
+     *
+     * @param sameRank If <code>true</code>, the resulting <code>dot</code> file will try to order
+     *     states in layers of breadth-first traversal. This may mess up arcs, but makes the output
+     *     FST's structure a bit clearer.
+     * @param labelStates If <code>true</code> states will have labels equal to their offsets in their
+     *     binary format. Expands the graph considerably.
+     * @see <a href="http://www.graphviz.org/">graphviz project</a>
+     */
+    public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates) throws IOException {
+        final String expandedNodeColor = "blue";
+
+        // This is the start arc in the automaton (from the epsilon state to the first state
+        // with outgoing transitions.
+        final Arc<T> startArc = fst.getFirstArc(new Arc<>());
+
+        // A queue of transitions to consider for the next level.
+        final List<Arc<T>> thisLevelQueue = new ArrayList<>();
+
+        // A queue of transitions to consider when processing the next level.
+        final List<Arc<T>> nextLevelQueue = new ArrayList<>();
+        nextLevelQueue.add(startArc);
+        // System.out.println("toDot: startArc: " + startArc);
+
+        // A list of states on the same level (for ranking).
+        final List<Integer> sameLevelStates = new ArrayList<>();
+
+        // A bitset of already seen states (target offset).
+        final BitSet seen = new BitSet();
+        seen.set((int) startArc.target());
+
+        // Shape for states.
+        final String stateShape = "circle";
+        final String finalStateShape = "doublecircle";
+
+        // Emit DOT prologue.
+        out.write("digraph FST {\n");
+        out.write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");
+
+        if (labelStates == false) {
+            out.write("  node [shape=circle, width=.2, height=.2, style=filled]\n");
+        }
+
+        emitDotState(out, "initial", "point", "white", "");
+
+        final T NO_OUTPUT = fst.outputs.getNoOutput();
+        final BytesReader r = fst.getBytesReader();
+
+        // final FST.Arc<T> scratchArc = new FST.Arc<>();
+
+        {
+            final String stateColor;
+            if (fst.isExpandedTarget(startArc, r)) {
+                stateColor = expandedNodeColor;
+            } else {
+                stateColor = null;
+            }
+
+            final boolean isFinal;
+            final T finalOutput;
+            if (startArc.isFinal()) {
+                isFinal = true;
+                finalOutput = startArc.nextFinalOutput() == NO_OUTPUT ? null : startArc.nextFinalOutput();
+            } else {
+                isFinal = false;
+                finalOutput = null;
+            }
+
+            emitDotState(
+                out,
+                Long.toString(startArc.target()),
+                isFinal ? finalStateShape : stateShape,
+                stateColor,
+                finalOutput == null ? "" : fst.outputs.outputToString(finalOutput)
+            );
+        }
+
+        out.write("  initial -> " + startArc.target() + "\n");
+
+        int level = 0;
+
+        while (nextLevelQueue.isEmpty() == false) {
+            // we could double buffer here, but it doesn't matter probably.
+            // System.out.println("next level=" + level);
+            thisLevelQueue.addAll(nextLevelQueue);
+            nextLevelQueue.clear();
+
+            level++;
+            out.write("\n  // Transitions and states at level: " + level + "\n");
+            while (thisLevelQueue.isEmpty() == false) {
+                final Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
+                // System.out.println(" pop: " + arc);
+                if (FST.targetHasArcs(arc)) {
+                    // scan all target arcs
+                    // System.out.println(" readFirstTarget...");
+
+                    final long node = arc.target();
+
+                    fst.readFirstRealTargetArc(arc.target(), arc, r);
+
+                    // System.out.println(" firstTarget: " + arc);
+
+                    while (true) {
+
+                        // System.out.println(" cycle arc=" + arc);
+                        // Emit the unseen state and add it to the queue for the next level.
+                        if (arc.target() >= 0 && seen.get((int) arc.target()) == false) {
+
+                            /*
+                            boolean isFinal = false;
+                            T finalOutput = null;
+                            fst.readFirstTargetArc(arc, scratchArc);
+                            if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
+                              // target is final
+                              isFinal = true;
+                              finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
+                              System.out.println("dot hit final label=" + (char) scratchArc.label);
+                            }
+                            */
+                            final String stateColor;
+                            if (fst.isExpandedTarget(arc, r)) {
+                                stateColor = expandedNodeColor;
+                            } else {
+                                stateColor = null;
+                            }
+
+                            final String finalOutput;
+                            if (arc.nextFinalOutput() != null && arc.nextFinalOutput() != NO_OUTPUT) {
+                                finalOutput = fst.outputs.outputToString(arc.nextFinalOutput());
+                            } else {
+                                finalOutput = "";
+                            }
+
+                            emitDotState(out, Long.toString(arc.target()), stateShape, stateColor, finalOutput);
+                            // To see the node address, use this instead:
+                            // emitDotState(out, Integer.toString(arc.target), stateShape, stateColor,
+                            // String.valueOf(arc.target));
+                            seen.set((int) arc.target());
+                            nextLevelQueue.add(new Arc<T>().copyFrom(arc));
+                            sameLevelStates.add((int) arc.target());
+                        }
+
+                        String outs;
+                        if (arc.output() != NO_OUTPUT) {
+                            outs = "/" + fst.outputs.outputToString(arc.output());
+                        } else {
+                            outs = "";
+                        }
+
+                        if (FST.targetHasArcs(arc) == false && arc.isFinal() && arc.nextFinalOutput() != NO_OUTPUT) {
+                            // Tricky special case: sometimes, due to
+                            // pruning, the builder can [sillily] produce
+                            // an FST with an arc into the final end state
+                            // (-1) but also with a next final output; in
+                            // this case we pull that output up onto this
+                            // arc
+                            outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput()) + "]";
+                        }
+
+                        final String arcColor;
+                        if (arc.flag(FST.BIT_TARGET_NEXT)) {
+                            arcColor = "red";
+                        } else {
+                            arcColor = "black";
+                        }
+
+                        assert arc.label() != FST.END_LABEL;
+                        out.write(
+                            "  "
+                                + node
+                                + " -> "
+                                + arc.target()
+                                + " [label=\""
+                                + printableLabel(arc.label())
+                                + outs
+                                + "\""
+                                + (arc.isFinal() ? " style=\"bold\"" : "")
+                                + " color=\""
+                                + arcColor
+                                + "\"]\n"
+                        );
+
+                        // Break the loop if we're on the last arc of this state.
+                        if (arc.isLast()) {
+                            // System.out.println(" break");
+                            break;
+                        }
+                        fst.readNextRealArc(arc, r);
+                    }
+                }
+            }
+
+            // Emit state ranking information.
+            if (sameRank && sameLevelStates.size() > 1) {
+                out.write("  {rank=same; ");
+                for (int state : sameLevelStates) {
+                    out.write(state + "; ");
+                }
+                out.write(" }\n");
+            }
+            sameLevelStates.clear();
+        }
+
+        // Emit terminating state (always there anyway).
+        out.write("  -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
+        out.write("  {rank=sink; -1 }\n");
+
+        out.write("}\n");
+        out.flush();
+    }
+
+    /** Emit a single state in the <code>dot</code> language. */
+    private static void emitDotState(Writer out, String name, String shape, String color, String label) throws IOException {
+        out.write(
+            "  "
+                + name
+                + " ["
+                + (shape != null ? "shape=" + shape : "")
+                + " "
+                + (color != null ? "color=" + color : "")
+                + " "
+                + (label != null ? "label=\"" + label + "\"" : "label=\"\"")
+                + " "
+                + "]\n"
+        );
+    }
+
+    /** Ensures an arc's label is indeed printable (dot uses US-ASCII). */
+    private static String printableLabel(int label) {
+        // Any ordinary ascii character, except for " or \, are
+        // printed as the character; else, as a hex string:
+        if (label >= 0x20 && label <= 0x7d && label != 0x22 && label != 0x5c) { // " OR \
+            return Character.toString((char) label);
+        }
+        return "0x" + Integer.toHexString(label);
+    }
+
+    /** Just maps each UTF16 unit (char) to the ints in an IntsRef. */
+    public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) {
+        final int charLimit = s.length();
+        scratch.setLength(charLimit);
+        scratch.grow(charLimit);
+        for (int idx = 0; idx < charLimit; idx++) {
+            scratch.setIntAt(idx, s.charAt(idx));
+        }
+        return scratch.get();
+    }
+
+    /**
+     * Decodes the Unicode codepoints from the provided CharSequence and places them in the provided
+     * scratch IntsRef, which must not be null, returning it.
+     */
+    public static IntsRef toUTF32(CharSequence s, IntsRefBuilder scratch) {
+        int charIdx = 0;
+        int intIdx = 0;
+        final int charLimit = s.length();
+        while (charIdx < charLimit) {
+            scratch.grow(intIdx + 1);
+            final int utf32 = Character.codePointAt(s, charIdx);
+            scratch.setIntAt(intIdx, utf32);
+            charIdx += Character.charCount(utf32);
+            intIdx++;
+        }
+        scratch.setLength(intIdx);
+        return scratch.get();
+    }
+
+    /**
+     * Decodes the Unicode codepoints from the provided char[] and places them in the provided scratch
+     * IntsRef, which must not be null, returning it.
+     */
+    public static IntsRef toUTF32(char[] s, int offset, int length, IntsRefBuilder scratch) {
+        int charIdx = offset;
+        int intIdx = 0;
+        final int charLimit = offset + length;
+        while (charIdx < charLimit) {
+            scratch.grow(intIdx + 1);
+            final int utf32 = Character.codePointAt(s, charIdx, charLimit);
+            scratch.setIntAt(intIdx, utf32);
+            charIdx += Character.charCount(utf32);
+            intIdx++;
+        }
+        scratch.setLength(intIdx);
+        return scratch.get();
+    }
+
+    /** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */
+    public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
+        scratch.clear();
+        for (int i = 0; i < input.length; i++) {
+            scratch.append(input.bytes[i + input.offset] & 0xFF);
+        }
+        return scratch.get();
+    }
+
+    /** Just converts IntsRef to BytesRef; you must ensure the int values fit into a byte. */
+    public static BytesRef toBytesRef(IntsRef input, BytesRefBuilder scratch) {
+        scratch.grow(input.length);
+        for (int i = 0; i < input.length; i++) {
+            int value = input.ints[i + input.offset];
+            // NOTE: we allow -128 to 255
+            assert value >= Byte.MIN_VALUE && value <= 255 : "value " + value + " doesn't fit into byte";
+            scratch.setByteAt(i, (byte) value);
+        }
+        scratch.setLength(input.length);
+        return scratch.get();
+    }
+
+    // Uncomment for debugging:
+
+    /*
+    public static <T> void dotToFile(FST<T> fst, String filePath) throws IOException {
+    Writer w = new OutputStreamWriter(new FileOutputStream(filePath));
+    toDot(fst, w, true, true);
+    w.close();
+    }
+    */
+
+    /**
+     * Reads the first arc greater or equal than the given label into the provided arc in place and
+     * returns it iff found, otherwise return <code>null</code>.
+     *
+     * @param label the label to ceil on
+     * @param fst the fst to operate on
+     * @param follow the arc to follow reading the label from
+     * @param arc the arc to read into in place
+     * @param in the fst's {@link BytesReader}
+     */
+    public static <T> Arc<T> readCeilArc(int label, FST<T> fst, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
+        if (label == FST.END_LABEL) {
+            return FST.readEndArc(follow, arc);
+        }
+        if (FST.targetHasArcs(follow) == false) {
+            return null;
+        }
+        fst.readFirstTargetArc(follow, arc, in);
+        if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) {
+            if (arc.nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) {
+                // Fixed length arcs in a direct addressing node.
+                int targetIndex = label - arc.label();
+                if (targetIndex >= arc.numArcs()) {
+                    return null;
+                } else if (targetIndex < 0) {
+                    return arc;
+                } else {
+                    if (BitTable.isBitSet(targetIndex, arc, in)) {
+                        fst.readArcByDirectAddressing(arc, in, targetIndex);
+                        assert arc.label() == label;
+                    } else {
+                        int ceilIndex = BitTable.nextBitSet(targetIndex, arc, in);
+                        assert ceilIndex != -1;
+                        fst.readArcByDirectAddressing(arc, in, ceilIndex);
+                        assert arc.label() > label;
+                    }
+                    return arc;
+                }
+            }
+            // Fixed length arcs in a binary search node.
+            int idx = binarySearch(fst, arc, label);
+            if (idx >= 0) {
+                return fst.readArcByIndex(arc, in, idx);
+            }
+            idx = -1 - idx;
+            if (idx == arc.numArcs()) {
+                // DEAD END!
+                return null;
+            }
+            return fst.readArcByIndex(arc, in, idx);
+        }
+
+        // Variable length arcs in a linear scan list,
+        // or special arc with label == FST.END_LABEL.
+        fst.readFirstRealTargetArc(follow.target(), arc, in);
+
+        while (true) {
+            // System.out.println(" non-bs cycle");
+            if (arc.label() >= label) {
+                // System.out.println(" found!");
+                return arc;
+            } else if (arc.isLast()) {
+                return null;
+            } else {
+                fst.readNextRealArc(arc, in);
+            }
+        }
+    }
+
+    /**
+     * Perform a binary search of Arcs encoded as a packed array
+     *
+     * @param fst the FST from which to read
+     * @param arc the starting arc; sibling arcs greater than this will be searched. Usually the first
+     *     arc in the array.
+     * @param targetLabel the label to search for
+     * @param <T> the output type of the FST
+     * @return the index of the Arc having the target label, or if no Arc has the matching label,
+     *     {@code -1 - idx)}, where {@code idx} is the index of the Arc with the next highest label,
+     *     or the total number of arcs if the target label exceeds the maximum.
+     * @throws IOException when the FST reader does
+     */
+    static <T> int binarySearch(FST<T> fst, Arc<T> arc, int targetLabel) throws IOException {
+        assert arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH
+            : "Arc is not encoded as packed array for binary search (nodeFlags=" + arc.nodeFlags() + ")";
+        BytesReader in = fst.getBytesReader();
+        int low = arc.arcIdx();
+        int mid;
+        int high = arc.numArcs() - 1;
+        while (low <= high) {
+            mid = (low + high) >>> 1;
+            in.setPosition(arc.posArcsStart());
+            in.skipBytes((long) arc.bytesPerArc() * mid + 1);
+            final int midLabel = fst.readLabel(in);
+            final int cmp = midLabel - targetLabel;
+            if (cmp < 0) {
+                low = mid + 1;
+            } else if (cmp > 0) {
+                high = mid - 1;
+            } else {
+                return mid;
+            }
+        }
+        return -1 - low;
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java
new file mode 100644
index 0000000000000..e6435dae4c12b
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java
@@ -0,0 +1,1124 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree;
+
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.compress.LZ4;
+import org.apache.lucene.util.compress.LowercaseAsciiCompression;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.ByteSequenceOutputs;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.BytesRefFSTEnum;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FST;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.FSTCompiler;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.Util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/*
+  TODO:
+
+    - Currently there is a one-to-one mapping of indexed
+      term to term block, but we could decouple the two, ie,
+      put more terms into the index than there are blocks.
+      The index would take up more RAM but then it'd be able
+      to avoid seeking more often and could make PK/FuzzyQ
+      faster if the additional indexed terms could store
+      the offset into the terms block.
+
+    - The blocks are not written in true depth-first
+      order, meaning if you just next() the file pointer will
+      sometimes jump backwards.  For example, block foo* will
+      be written before block f* because it finished before.
+      This could possibly hurt performance if the terms dict is
+      not hot, since OSs anticipate sequential file access.  We
+      could fix the writer to re-order the blocks as a 2nd
+      pass.
+
+    - Each block encodes the term suffixes packed
+      sequentially using a separate vInt per term, which is
+      1) wasteful and 2) slow (must linear scan to find a
+      particular suffix).  We should instead 1) make
+      random-access array so we can directly access the Nth
+      suffix, and 2) bulk-encode this array using bulk int[]
+      codecs; then at search time we can binary search when
+      we seek a particular term.
+*/
+
+/**
+ * Block-based terms index and dictionary writer.
+ *
+ * <p>Writes terms dict and index, block-encoding (column stride) each term's metadata for each set
+ * of terms between two index terms.
+ *
+ * <p>Files:
+ *
+ * <ul>
+ *   <li><code>.tim</code>: <a href="#Termdictionary">Term Dictionary</a>
+ *   <li><code>.tip</code>: <a href="#Termindex">Term Index</a>
+ * </ul>
+ *
+ * <p><a id="Termdictionary"></a>
+ *
+ * <h2>Term Dictionary</h2>
+ *
+ * <p>The .tim file contains the list of terms in each field along with per-term statistics (such as
+ * docfreq) and per-term metadata (typically pointers to the postings list for that term in the
+ * inverted index).
+ *
+ * <p>The .tim is arranged in blocks: with blocks containing a variable number of entries (by
+ * default 25-48), where each entry is either a term or a reference to a sub-block.
+ *
+ * <p>NOTE: The term dictionary can plug into different postings implementations: the postings
+ * writer/reader are actually responsible for encoding and decoding the Postings Metadata and Term
+ * Metadata sections.
+ *
+ * <ul>
+ *   <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
+ *       FieldSummary, DirOffset, Footer
+ *   <li>NodeBlock --&gt; (OuterNode | InnerNode)
+ *   <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt;
+ *       TermStats &gt;<sup>EntryCount</sup>, MetaLength,
+ *       &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup>
+ *   <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength,
+ *       &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ?
+ *       </i>&gt;<sup>EntryCount</sup>
+ *   <li>TermStats --&gt; DocFreq, TotalTermFreq
+ *   <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength,
+ *       Byte<sup>RootCodeLength</sup>, SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm,
+ *       MaxTerm&gt;<sup>NumFields</sup>
+ *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}
+ *   <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}
+ *   <li>MinTerm,MaxTerm --&gt; {@link DataOutput#writeVInt VInt} length followed by the byte[]
+ *   <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
+ *       FieldNumber,RootCodeLength,DocCount,LongsSize --&gt; {@link DataOutput#writeVInt VInt}
+ *   <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt; {@link DataOutput#writeVLong
+ *       VLong}
+ *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}
+ * </ul>
+ *
+ * <p>Notes:
+ *
+ * <ul>
+ *   <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information for
+ *       the BlockTree implementation.
+ *   <li>DirOffset is a pointer to the FieldSummary section.
+ *   <li>DocFreq is the count of documents which contain the term.
+ *   <li>TotalTermFreq is the total number of occurrences of the term. This is encoded as the
+ *       difference between the total number of occurrences and the DocFreq.
+ *   <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)
+ *   <li>NumTerms is the number of unique terms for the field.
+ *   <li>RootCode points to the root block for the field.
+ *   <li>SumDocFreq is the total number of postings, the number of term-document pairs across the
+ *       entire field.
+ *   <li>DocCount is the number of documents that have at least one posting for this field.
+ *   <li>LongsSize records how many long values the postings writer/reader record per term (e.g., to
+ *       hold freq/prox/doc file offsets).
+ *   <li>MinTerm, MaxTerm are the lowest and highest term in this field.
+ *   <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
+ *       these contain arbitrary per-file data (such as parameters or versioning information) and
+ *       per-term data (such as pointers to inverted files).
+ *   <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points to
+ *       child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted
+ * </ul>
+ *
+ * <a id="Termindex"></a>
+ *
+ * <h2>Term Index</h2>
+ *
+ * <p>The .tip file contains an index into the term dictionary, so that it can be accessed randomly.
+ * The index is also used to determine when a given term cannot exist on disk (in the .tim file),
+ * saving a disk seek.
+ *
+ * <ul>
+ *   <li>TermsIndex (.tip) --&gt; Header, FSTIndex<sup>NumFields</sup>
+ *       &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset, Footer
+ *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}
+ *   <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}
+ *   <li>IndexStartFP --&gt; {@link DataOutput#writeVLong VLong}
+ *       <!-- TODO: better describe FST output here -->
+ *   <li>FSTIndex --&gt; {@link FST FST&lt;byte[]&gt;}
+ *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}
+ * </ul>
+ *
+ * <p>Notes:
+ *
+ * <ul>
+ *   <li>The .tip file contains a separate FST for each field. The FST maps a term prefix to the
+ *       on-disk block that holds all terms starting with that prefix. Each field's IndexStartFP
+ *       points to its FST.
+ *   <li>DirOffset is a pointer to the start of the IndexStartFPs for all fields
+ *   <li>It's possible that an on-disk block would contain too many terms (more than the allowed
+ *       maximum (default: 48)). When this happens, the block is sub-divided into new blocks (called
+ *       "floor blocks"), and then the output in the FST for the block's prefix encodes the leading
+ *       byte of each sub-block, and its file pointer.
+ * </ul>
+ *
+ * @see Lucene40BlockTreeTermsReader
+ * @lucene.experimental
+ */
+public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
+
+    /**
+     * Suggested default value for the {@code minItemsInBlock} parameter to {@link
+     * #Lucene40BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}.
+     */
+    public static final int DEFAULT_MIN_BLOCK_SIZE = 25;
+
+    /**
+     * Suggested default value for the {@code maxItemsInBlock} parameter to {@link
+     * #Lucene40BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}.
+     */
+    public static final int DEFAULT_MAX_BLOCK_SIZE = 48;
+
+    // public static boolean DEBUG = false;
+    // public static boolean DEBUG2 = false;
+
+    // private final static boolean SAVE_DOT_FILES = false;
+
+    private final IndexOutput metaOut;
+    private final IndexOutput termsOut;
+    private final IndexOutput indexOut;
+    final int maxDoc;
+    final int minItemsInBlock;
+    final int maxItemsInBlock;
+
+    final PostingsWriterBase postingsWriter;
+    final FieldInfos fieldInfos;
+
+    private final List<ByteBuffersDataOutput> fields = new ArrayList<>();
+
+    /**
+     * Create a new writer. The number of items (terms or sub-blocks) per block will aim to be between
+     * minItemsPerBlock and maxItemsPerBlock, though in some cases the blocks may be smaller than the
+     * min.
+     */
+    public Lucene40BlockTreeTermsWriter(
+        SegmentWriteState state,
+        PostingsWriterBase postingsWriter,
+        int minItemsInBlock,
+        int maxItemsInBlock
+    ) throws IOException {
+        validateSettings(minItemsInBlock, maxItemsInBlock);
+
+        this.minItemsInBlock = minItemsInBlock;
+        this.maxItemsInBlock = maxItemsInBlock;
+
+        this.maxDoc = state.segmentInfo.maxDoc();
+        this.fieldInfos = state.fieldInfos;
+        this.postingsWriter = postingsWriter;
+
+        final String termsName = IndexFileNames.segmentFileName(
+            state.segmentInfo.name,
+            state.segmentSuffix,
+            Lucene40BlockTreeTermsReader.TERMS_EXTENSION
+        );
+        termsOut = EndiannessReverserUtil.createOutput(state.directory, termsName, state.context);
+        boolean success = false;
+        IndexOutput metaOut = null, indexOut = null;
+        try {
+            CodecUtil.writeIndexHeader(
+                termsOut,
+                Lucene40BlockTreeTermsReader.TERMS_CODEC_NAME,
+                Lucene40BlockTreeTermsReader.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+
+            final String indexName = IndexFileNames.segmentFileName(
+                state.segmentInfo.name,
+                state.segmentSuffix,
+                Lucene40BlockTreeTermsReader.TERMS_INDEX_EXTENSION
+            );
+            indexOut = EndiannessReverserUtil.createOutput(state.directory, indexName, state.context);
+            CodecUtil.writeIndexHeader(
+                indexOut,
+                Lucene40BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME,
+                Lucene40BlockTreeTermsReader.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            // segment = state.segmentInfo.name;
+
+            final String metaName = IndexFileNames.segmentFileName(
+                state.segmentInfo.name,
+                state.segmentSuffix,
+                Lucene40BlockTreeTermsReader.TERMS_META_EXTENSION
+            );
+            metaOut = EndiannessReverserUtil.createOutput(state.directory, metaName, state.context);
+            CodecUtil.writeIndexHeader(
+                metaOut,
+                Lucene40BlockTreeTermsReader.TERMS_META_CODEC_NAME,
+                Lucene40BlockTreeTermsReader.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+
+            postingsWriter.init(metaOut, state); // have consumer write its format/header
+
+            this.metaOut = metaOut;
+            this.indexOut = indexOut;
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut);
+            }
+        }
+    }
+
+    /** Throws {@code IllegalArgumentException} if any of these settings is invalid. */
+    public static void validateSettings(int minItemsInBlock, int maxItemsInBlock) {
+        if (minItemsInBlock <= 1) {
+            throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
+        }
+        if (minItemsInBlock > maxItemsInBlock) {
+            throw new IllegalArgumentException(
+                "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock
+            );
+        }
+        if (2 * (minItemsInBlock - 1) > maxItemsInBlock) {
+            throw new IllegalArgumentException(
+                "maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock="
+                    + maxItemsInBlock
+                    + " minItemsInBlock="
+                    + minItemsInBlock
+            );
+        }
+    }
+
+    @Override
+    public void write(Fields fields, NormsProducer norms) throws IOException {
+        // if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
+
+        String lastField = null;
+        for (String field : fields) {
+            assert lastField == null || lastField.compareTo(field) < 0;
+            lastField = field;
+
+            // if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
+            Terms terms = fields.terms(field);
+            if (terms == null) {
+                continue;
+            }
+
+            TermsEnum termsEnum = terms.iterator();
+            TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
+            while (true) {
+                BytesRef term = termsEnum.next();
+                // if (DEBUG) System.out.println("BTTW: next term " + term);
+
+                if (term == null) {
+                    break;
+                }
+
+                // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
+                // brToString(term));
+                termsWriter.write(term, termsEnum, norms);
+            }
+
+            termsWriter.finish();
+
+            // if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
+        }
+    }
+
+    static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
+        assert fp < (1L << 62);
+        return (fp << 2) | (hasTerms ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) | (isFloor
+            ? Lucene40BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR
+            : 0);
+    }
+
+    private static class PendingEntry {
+        public final boolean isTerm;
+
+        protected PendingEntry(boolean isTerm) {
+            this.isTerm = isTerm;
+        }
+    }
+
+    private static final class PendingTerm extends PendingEntry {
+        public final byte[] termBytes;
+        // stats + metadata
+        public final BlockTermState state;
+
+        PendingTerm(BytesRef term, BlockTermState state) {
+            super(true);
+            this.termBytes = new byte[term.length];
+            System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length);
+            this.state = state;
+        }
+
+        @Override
+        public String toString() {
+            return "TERM: " + brToString(termBytes);
+        }
+    }
+
+    // for debugging
+    @SuppressWarnings("unused")
+    static String brToString(BytesRef b) {
+        if (b == null) {
+            return "(null)";
+        } else {
+            try {
+                return b.utf8ToString() + " " + b;
+            } catch (Throwable t) {
+                // If BytesRef isn't actually UTF8, or it's eg a
+                // prefix of UTF8 that ends mid-unicode-char, we
+                // fallback to hex:
+                return b.toString();
+            }
+        }
+    }
+
+    // for debugging
+    @SuppressWarnings("unused")
+    static String brToString(byte[] b) {
+        return brToString(new BytesRef(b));
+    }
+
+    private static final class PendingBlock extends PendingEntry {
+        public final BytesRef prefix;
+        public final long fp;
+        public FST<BytesRef> index;
+        public List<FST<BytesRef>> subIndices;
+        public final boolean hasTerms;
+        public final boolean isFloor;
+        public final int floorLeadByte;
+
+        PendingBlock(BytesRef prefix, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<BytesRef>> subIndices) {
+            super(false);
+            this.prefix = prefix;
+            this.fp = fp;
+            this.hasTerms = hasTerms;
+            this.isFloor = isFloor;
+            this.floorLeadByte = floorLeadByte;
+            this.subIndices = subIndices;
+        }
+
+        @Override
+        public String toString() {
+            return "BLOCK: prefix=" + brToString(prefix);
+        }
+
+        public void compileIndex(List<PendingBlock> blocks, ByteBuffersDataOutput scratchBytes, IntsRefBuilder scratchIntsRef)
+            throws IOException {
+
+            assert (isFloor && blocks.size() > 1) || (isFloor == false && blocks.size() == 1) : "isFloor=" + isFloor + " blocks=" + blocks;
+            assert this == blocks.get(0);
+
+            assert scratchBytes.size() == 0;
+
+            // TODO: try writing the leading vLong in MSB order
+            // (opposite of what Lucene does today), for better
+            // outputs sharing in the FST
+            scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
+            if (isFloor) {
+                scratchBytes.writeVInt(blocks.size() - 1);
+                for (int i = 1; i < blocks.size(); i++) {
+                    PendingBlock sub = blocks.get(i);
+                    assert sub.floorLeadByte != -1;
+                    // if (DEBUG) {
+                    // System.out.println(" write floorLeadByte=" +
+                    // Integer.toHexString(sub.floorLeadByte&0xff));
+                    // }
+                    scratchBytes.writeByte((byte) sub.floorLeadByte);
+                    assert sub.fp > fp;
+                    scratchBytes.writeVLong((sub.fp - fp) << 1 | (sub.hasTerms ? 1 : 0));
+                }
+            }
+
+            final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
+            final FSTCompiler<BytesRef> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).shouldShareNonSingletonNodes(
+                false
+            ).build();
+            // if (DEBUG) {
+            // System.out.println(" compile index for prefix=" + prefix);
+            // }
+            // indexBuilder.DEBUG = false;
+            final byte[] bytes = scratchBytes.toArrayCopy();
+            assert bytes.length > 0;
+            fstCompiler.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
+            scratchBytes.reset();
+
+            // Copy over index for all sub-blocks
+            for (PendingBlock block : blocks) {
+                if (block.subIndices != null) {
+                    for (FST<BytesRef> subIndex : block.subIndices) {
+                        append(fstCompiler, subIndex, scratchIntsRef);
+                    }
+                    block.subIndices = null;
+                }
+            }
+
+            index = fstCompiler.compile();
+
+            assert subIndices == null;
+
+            /*
+            Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
+            Util.toDot(index, w, false, false);
+            System.out.println("SAVED to out.dot");
+            w.close();
+            */
+        }
+
+        // TODO: maybe we could add bulk-add method to
+        // Builder? Takes FST and unions it w/ current
+        // FST.
+        private void append(FSTCompiler<BytesRef> fstCompiler, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
+            final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
+            BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
+            while ((indexEnt = subIndexEnum.next()) != null) {
+                // if (DEBUG) {
+                // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output="
+                // + indexEnt.output);
+                // }
+                fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
+            }
+        }
+    }
+
+    private final ByteBuffersDataOutput scratchBytes = ByteBuffersDataOutput.newResettableInstance();
+    private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
+
+    static final BytesRef EMPTY_BYTES_REF = new BytesRef();
+
+    private static class StatsWriter {
+
+        private final DataOutput out;
+        private final boolean hasFreqs;
+        private int singletonCount;
+
+        StatsWriter(DataOutput out, boolean hasFreqs) {
+            this.out = out;
+            this.hasFreqs = hasFreqs;
+        }
+
+        void add(int df, long ttf) throws IOException {
+            // Singletons (DF==1, TTF==1) are run-length encoded
+            if (df == 1 && (hasFreqs == false || ttf == 1)) {
+                singletonCount++;
+            } else {
+                finish();
+                out.writeVInt(df << 1);
+                if (hasFreqs) {
+                    out.writeVLong(ttf - df);
+                }
+            }
+        }
+
+        void finish() throws IOException {
+            if (singletonCount > 0) {
+                out.writeVInt(((singletonCount - 1) << 1) | 1);
+                singletonCount = 0;
+            }
+        }
+    }
+
+    class TermsWriter {
+        private final FieldInfo fieldInfo;
+        private long numTerms;
+        final FixedBitSet docsSeen;
+        long sumTotalTermFreq;
+        long sumDocFreq;
+
+        // Records index into pending where the current prefix at that
+        // length "started"; for example, if current term starts with 't',
+        // startsByPrefix[0] is the index into pending for the first
+        // term/sub-block starting with 't'. We use this to figure out when
+        // to write a new block:
+        private final BytesRefBuilder lastTerm = new BytesRefBuilder();
+        private int[] prefixStarts = new int[8];
+
+        // Pending stack of terms and blocks. As terms arrive (in sorted order)
+        // we append to this stack, and once the top of the stack has enough
+        // terms starting with a common prefix, we write a new block with
+        // those terms and replace those terms in the stack with a new block:
+        private final List<PendingEntry> pending = new ArrayList<>();
+
+        // Reused in writeBlocks:
+        private final List<PendingBlock> newBlocks = new ArrayList<>();
+
+        private PendingTerm firstPendingTerm;
+        private PendingTerm lastPendingTerm;
+
+        /** Writes the top count entries in pending, using prevTerm to compute the prefix. */
+        void writeBlocks(int prefixLength, int count) throws IOException {
+
+            assert count > 0;
+
+            // if (DEBUG2) {
+            // BytesRef br = new BytesRef(lastTerm.bytes());
+            // br.length = prefixLength;
+            // System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
+            // + count);
+            // }
+
+            // Root block better write all remaining pending entries:
+            assert prefixLength > 0 || count == pending.size();
+
+            int lastSuffixLeadLabel = -1;
+
+            // True if we saw at least one term in this block (we record if a block
+            // only points to sub-blocks in the terms index so we can avoid seeking
+            // to it when we are looking for a term):
+            boolean hasTerms = false;
+            boolean hasSubBlocks = false;
+
+            int start = pending.size() - count;
+            int end = pending.size();
+            int nextBlockStart = start;
+            int nextFloorLeadLabel = -1;
+
+            for (int i = start; i < end; i++) {
+
+                PendingEntry ent = pending.get(i);
+
+                int suffixLeadLabel;
+
+                if (ent.isTerm) {
+                    PendingTerm term = (PendingTerm) ent;
+                    if (term.termBytes.length == prefixLength) {
+                        // Suffix is 0, i.e. prefix 'foo' and term is
+                        // 'foo' so the term has empty string suffix
+                        // in this block
+                        assert lastSuffixLeadLabel == -1 : "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
+                        suffixLeadLabel = -1;
+                    } else {
+                        suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
+                    }
+                } else {
+                    PendingBlock block = (PendingBlock) ent;
+                    assert block.prefix.length > prefixLength;
+                    suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
+                }
+                // if (DEBUG) System.out.println(" i=" + i + " ent=" + ent + " suffixLeadLabel=" +
+                // suffixLeadLabel);
+
+                if (suffixLeadLabel != lastSuffixLeadLabel) {
+                    int itemsInBlock = i - nextBlockStart;
+                    if (itemsInBlock >= minItemsInBlock && end - nextBlockStart > maxItemsInBlock) {
+                        // The count is too large for one block, so we must break it into "floor" blocks, where
+                        // we record
+                        // the leading label of the suffix of the first term in each floor block, so at search
+                        // time we can
+                        // jump to the right floor block. We just use a naive greedy segmenter here: make a new
+                        // floor
+                        // block as soon as we have at least minItemsInBlock. This is not always best: it often
+                        // produces
+                        // a too-small block as the final block:
+                        boolean isFloor = itemsInBlock < count;
+                        newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
+
+                        hasTerms = false;
+                        hasSubBlocks = false;
+                        nextFloorLeadLabel = suffixLeadLabel;
+                        nextBlockStart = i;
+                    }
+
+                    lastSuffixLeadLabel = suffixLeadLabel;
+                }
+
+                if (ent.isTerm) {
+                    hasTerms = true;
+                } else {
+                    hasSubBlocks = true;
+                }
+            }
+
+            // Write last block, if any:
+            if (nextBlockStart < end) {
+                int itemsInBlock = end - nextBlockStart;
+                boolean isFloor = itemsInBlock < count;
+                newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
+            }
+
+            assert newBlocks.isEmpty() == false;
+
+            PendingBlock firstBlock = newBlocks.get(0);
+
+            assert firstBlock.isFloor || newBlocks.size() == 1;
+
+            firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef);
+
+            // Remove slice from the top of the pending stack, that we just wrote:
+            pending.subList(pending.size() - count, pending.size()).clear();
+
+            // Append new block
+            pending.add(firstBlock);
+
+            newBlocks.clear();
+        }
+
+        private boolean allEqual(byte[] b, int startOffset, int endOffset, byte value) {
+            Objects.checkFromToIndex(startOffset, endOffset, b.length);
+            for (int i = startOffset; i < endOffset; ++i) {
+                if (b[i] != value) {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /**
+         * Writes the specified slice (start is inclusive, end is exclusive) from pending stack as a new
+         * block. If isFloor is true, there were too many (more than maxItemsInBlock) entries sharing
+         * the same prefix, and so we broke it into multiple floor blocks where we record the starting
+         * label of the suffix of each floor block.
+         */
+        private PendingBlock writeBlock(
+            int prefixLength,
+            boolean isFloor,
+            int floorLeadLabel,
+            int start,
+            int end,
+            boolean hasTerms,
+            boolean hasSubBlocks
+        ) throws IOException {
+
+            assert end > start;
+
+            long startFP = termsOut.getFilePointer();
+
+            boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
+
+            final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0));
+            System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength);
+            prefix.length = prefixLength;
+
+            // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
+            // brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
+            // pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
+            // " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
+
+            // Write block header:
+            int numEntries = end - start;
+            int code = numEntries << 1;
+            if (end == pending.size()) {
+                // Last block:
+                code |= 1;
+            }
+            termsOut.writeVInt(code);
+
+            // 1st pass: pack term suffix bytes into byte[] blob
+            // TODO: cutover to bulk int codec... simple64?
+
+            // We optimize the leaf block case (block has only terms), writing a more
+            // compact format in this case:
+            boolean isLeafBlock = hasSubBlocks == false;
+
+            // System.out.println(" isLeaf=" + isLeafBlock);
+
+            final List<FST<BytesRef>> subIndices;
+
+            boolean absolute = true;
+
+            if (isLeafBlock) {
+                // Block contains only ordinary terms:
+                subIndices = null;
+                StatsWriter statsWriter = new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
+                for (int i = start; i < end; i++) {
+                    PendingEntry ent = pending.get(i);
+                    assert ent.isTerm : "i=" + i;
+
+                    PendingTerm term = (PendingTerm) ent;
+
+                    assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix;
+                    BlockTermState state = term.state;
+                    final int suffix = term.termBytes.length - prefixLength;
+                    // if (DEBUG2) {
+                    // BytesRef suffixBytes = new BytesRef(suffix);
+                    // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
+                    // suffixBytes.length = suffix;
+                    // System.out.println(" write term suffix=" + brToString(suffixBytes));
+                    // }
+
+                    // For leaf block we write suffix straight
+                    suffixLengthsWriter.writeVInt(suffix);
+                    suffixWriter.append(term.termBytes, prefixLength, suffix);
+                    assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
+
+                    // Write term stats, to separate byte[] blob:
+                    statsWriter.add(state.docFreq, state.totalTermFreq);
+
+                    // Write term meta data
+                    postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
+                    absolute = false;
+                }
+                statsWriter.finish();
+            } else {
+                // Block has at least one prefix term or a sub block:
+                subIndices = new ArrayList<>();
+                StatsWriter statsWriter = new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
+                for (int i = start; i < end; i++) {
+                    PendingEntry ent = pending.get(i);
+                    if (ent.isTerm) {
+                        PendingTerm term = (PendingTerm) ent;
+
+                        assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix;
+                        BlockTermState state = term.state;
+                        final int suffix = term.termBytes.length - prefixLength;
+                        // if (DEBUG2) {
+                        // BytesRef suffixBytes = new BytesRef(suffix);
+                        // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
+                        // suffixBytes.length = suffix;
+                        // System.out.println(" write term suffix=" + brToString(suffixBytes));
+                        // }
+
+                        // For non-leaf block we borrow 1 bit to record
+                        // if entry is term or sub-block, and 1 bit to record if
+                        // it's a prefix term. Terms cannot be larger than ~32 KB
+                        // so we won't run out of bits:
+
+                        suffixLengthsWriter.writeVInt(suffix << 1);
+                        suffixWriter.append(term.termBytes, prefixLength, suffix);
+
+                        // Write term stats, to separate byte[] blob:
+                        statsWriter.add(state.docFreq, state.totalTermFreq);
+
+                        // TODO: now that terms dict "sees" these longs,
+                        // we can explore better column-stride encodings
+                        // to encode all long[0]s for this block at
+                        // once, all long[1]s, etc., e.g. using
+                        // Simple64. Alternatively, we could interleave
+                        // stats + meta ... no reason to have them
+                        // separate anymore:
+
+                        // Write term meta data
+                        postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
+                        absolute = false;
+                    } else {
+                        PendingBlock block = (PendingBlock) ent;
+                        assert StringHelper.startsWith(block.prefix, prefix);
+                        final int suffix = block.prefix.length - prefixLength;
+                        assert StringHelper.startsWith(block.prefix, prefix);
+
+                        assert suffix > 0;
+
+                        // For non-leaf block we borrow 1 bit to record
+                        // if entry is term or sub-block:f
+                        suffixLengthsWriter.writeVInt((suffix << 1) | 1);
+                        suffixWriter.append(block.prefix.bytes, prefixLength, suffix);
+
+                        // if (DEBUG2) {
+                        // BytesRef suffixBytes = new BytesRef(suffix);
+                        // System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+                        // suffixBytes.length = suffix;
+                        // System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
+                        // subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
+                        // }
+
+                        assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel
+                            : "floorLeadLabel=" + floorLeadLabel + " suffixLead=" + (block.prefix.bytes[prefixLength] & 0xff);
+                        assert block.fp < startFP;
+
+                        suffixLengthsWriter.writeVLong(startFP - block.fp);
+                        subIndices.add(block.index);
+                    }
+                }
+                statsWriter.finish();
+
+                assert subIndices.size() != 0;
+            }
+
+            // Write suffixes byte[] blob to terms dict output, either uncompressed, compressed with LZ4
+            // or with LowercaseAsciiCompression.
+            CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
+            // If there are 2 suffix bytes or less per term, then we don't bother compressing as suffix
+            // are unlikely what
+            // makes the terms dictionary large, and it also tends to be frequently the case for dense IDs
+            // like
+            // auto-increment IDs, so not compressing in that case helps not hurt ID lookups by too much.
+            // We also only start compressing when the prefix length is greater than 2 since blocks whose
+            // prefix length is
+            // 1 or 2 always all get visited when running a fuzzy query whose max number of edits is 2.
+            if (suffixWriter.length() > 2L * numEntries && prefixLength > 2) {
+                // LZ4 inserts references whenever it sees duplicate strings of 4 chars or more, so only try
+                // it out if the
+                // average suffix length is greater than 6.
+                if (suffixWriter.length() > 6L * numEntries) {
+                    LZ4.compress(suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable);
+                    if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) {
+                        // LZ4 saved more than 25%, go for it
+                        compressionAlg = CompressionAlgorithm.LZ4;
+                    }
+                }
+                if (compressionAlg == CompressionAlgorithm.NO_COMPRESSION) {
+                    spareWriter.reset();
+                    if (spareBytes.length < suffixWriter.length()) {
+                        spareBytes = new byte[ArrayUtil.oversize(suffixWriter.length(), 1)];
+                    }
+                    if (LowercaseAsciiCompression.compress(suffixWriter.bytes(), suffixWriter.length(), spareBytes, spareWriter)) {
+                        compressionAlg = CompressionAlgorithm.LOWERCASE_ASCII;
+                    }
+                }
+            }
+            long token = ((long) suffixWriter.length()) << 3;
+            if (isLeafBlock) {
+                token |= 0x04;
+            }
+            token |= compressionAlg.code;
+            termsOut.writeVLong(token);
+            if (compressionAlg == CompressionAlgorithm.NO_COMPRESSION) {
+                termsOut.writeBytes(suffixWriter.bytes(), suffixWriter.length());
+            } else {
+                spareWriter.copyTo(termsOut);
+            }
+            suffixWriter.setLength(0);
+            spareWriter.reset();
+
+            // Write suffix lengths
+            final int numSuffixBytes = Math.toIntExact(suffixLengthsWriter.size());
+            spareBytes = ArrayUtil.grow(spareBytes, numSuffixBytes);
+            suffixLengthsWriter.copyTo(new ByteArrayDataOutput(spareBytes));
+            suffixLengthsWriter.reset();
+            if (allEqual(spareBytes, 1, numSuffixBytes, spareBytes[0])) {
+                // Structured fields like IDs often have most values of the same length
+                termsOut.writeVInt((numSuffixBytes << 1) | 1);
+                termsOut.writeByte(spareBytes[0]);
+            } else {
+                termsOut.writeVInt(numSuffixBytes << 1);
+                termsOut.writeBytes(spareBytes, numSuffixBytes);
+            }
+
+            // Stats
+            final int numStatsBytes = Math.toIntExact(statsWriter.size());
+            termsOut.writeVInt(numStatsBytes);
+            statsWriter.copyTo(termsOut);
+            statsWriter.reset();
+
+            // Write term meta data byte[] blob
+            termsOut.writeVInt((int) metaWriter.size());
+            metaWriter.copyTo(termsOut);
+            metaWriter.reset();
+
+            // if (DEBUG) {
+            // System.out.println(" fpEnd=" + out.getFilePointer());
+            // }
+
+            if (hasFloorLeadLabel) {
+                // We already allocated to length+1 above:
+                prefix.bytes[prefix.length++] = (byte) floorLeadLabel;
+            }
+
+            return new PendingBlock(prefix, startFP, hasTerms, isFloor, floorLeadLabel, subIndices);
+        }
+
+        TermsWriter(FieldInfo fieldInfo) {
+            this.fieldInfo = fieldInfo;
+            assert fieldInfo.getIndexOptions() != IndexOptions.NONE;
+            docsSeen = new FixedBitSet(maxDoc);
+            postingsWriter.setField(fieldInfo);
+        }
+
+        /** Writes one term's worth of postings. */
+        public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {
+
+            BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms);
+            if (state != null) {
+
+                assert state.docFreq != 0;
+                assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq
+                    : "postingsWriter=" + postingsWriter;
+                pushTerm(text);
+
+                PendingTerm term = new PendingTerm(text, state);
+                pending.add(term);
+                // if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" +
+                // pending.size());
+
+                sumDocFreq += state.docFreq;
+                sumTotalTermFreq += state.totalTermFreq;
+                numTerms++;
+                if (firstPendingTerm == null) {
+                    firstPendingTerm = term;
+                }
+                lastPendingTerm = term;
+            }
+        }
+
+        /** Pushes the new term to the top of the stack, and writes new blocks. */
+        private void pushTerm(BytesRef text) throws IOException {
+            // Find common prefix between last term and current term:
+            int prefixLength = Arrays.mismatch(lastTerm.bytes(), 0, lastTerm.length(), text.bytes, text.offset, text.offset + text.length);
+            if (prefixLength == -1) { // Only happens for the first term, if it is empty
+                assert lastTerm.length() == 0;
+                prefixLength = 0;
+            }
+
+            // if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length);
+
+            // Close the "abandoned" suffix now:
+            for (int i = lastTerm.length() - 1; i >= prefixLength; i--) {
+
+                // How many items on top of the stack share the current suffix
+                // we are closing:
+                int prefixTopSize = pending.size() - prefixStarts[i];
+                if (prefixTopSize >= minItemsInBlock) {
+                    // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
+                    // minItemsInBlock=" + minItemsInBlock);
+                    writeBlocks(i + 1, prefixTopSize);
+                    prefixStarts[i] -= prefixTopSize - 1;
+                }
+            }
+
+            if (prefixStarts.length < text.length) {
+                prefixStarts = ArrayUtil.grow(prefixStarts, text.length);
+            }
+
+            // Init new tail:
+            for (int i = prefixLength; i < text.length; i++) {
+                prefixStarts[i] = pending.size();
+            }
+
+            lastTerm.copyBytes(text);
+        }
+
+        // Finishes all terms in this field
+        public void finish() throws IOException {
+            if (numTerms > 0) {
+                // if (DEBUG) System.out.println("BTTW: finish prefixStarts=" +
+                // Arrays.toString(prefixStarts));
+
+                // Add empty term to force closing of all final blocks:
+                pushTerm(new BytesRef());
+
+                // TODO: if pending.size() is already 1 with a non-zero prefix length
+                // we can save writing a "degenerate" root block, but we have to
+                // fix all the places that assume the root block's prefix is the empty string:
+                pushTerm(new BytesRef());
+                writeBlocks(0, pending.size());
+
+                // We better have one final "root" block:
+                assert pending.size() == 1 && pending.get(0).isTerm == false : "pending.size()=" + pending.size() + " pending=" + pending;
+                final PendingBlock root = (PendingBlock) pending.get(0);
+                assert root.prefix.length == 0;
+                final BytesRef rootCode = root.index.getEmptyOutput();
+                assert rootCode != null;
+
+                ByteBuffersDataOutput metaOut = new ByteBuffersDataOutput();
+                fields.add(metaOut);
+
+                metaOut.writeVInt(fieldInfo.number);
+                metaOut.writeVLong(numTerms);
+                metaOut.writeVInt(rootCode.length);
+                metaOut.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
+                assert fieldInfo.getIndexOptions() != IndexOptions.NONE;
+                if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
+                    metaOut.writeVLong(sumTotalTermFreq);
+                }
+                metaOut.writeVLong(sumDocFreq);
+                metaOut.writeVInt(docsSeen.cardinality());
+                writeBytesRef(metaOut, new BytesRef(firstPendingTerm.termBytes));
+                writeBytesRef(metaOut, new BytesRef(lastPendingTerm.termBytes));
+                metaOut.writeVLong(indexOut.getFilePointer());
+                // Write FST to index
+                root.index.save(metaOut, indexOut);
+                // System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
+
+                /*
+                if (DEBUG) {
+                  final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+                  Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+                  Util.toDot(root.index, w, false, false);
+                  System.out.println("SAVED to " + dotFileName);
+                  w.close();
+                }
+                */
+
+            } else {
+                assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
+                assert sumDocFreq == 0;
+                assert docsSeen.cardinality() == 0;
+            }
+        }
+
+        private final ByteBuffersDataOutput suffixLengthsWriter = ByteBuffersDataOutput.newResettableInstance();
+        private final BytesRefBuilder suffixWriter = new BytesRefBuilder();
+        private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
+        private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
+        private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance();
+        private byte[] spareBytes = BytesRef.EMPTY_BYTES;
+        private final LZ4.HighCompressionHashTable compressionHashTable = new LZ4.HighCompressionHashTable();
+    }
+
+    private boolean closed;
+
+    @Override
+    public void close() throws IOException {
+        if (closed) {
+            return;
+        }
+        closed = true;
+
+        boolean success = false;
+        try {
+            metaOut.writeVInt(fields.size());
+            for (ByteBuffersDataOutput fieldMeta : fields) {
+                fieldMeta.copyTo(metaOut);
+            }
+            CodecUtil.writeFooter(indexOut);
+            metaOut.writeLong(indexOut.getFilePointer());
+            CodecUtil.writeFooter(termsOut);
+            metaOut.writeLong(termsOut.getFilePointer());
+            CodecUtil.writeFooter(metaOut);
+            success = true;
+        } finally {
+            if (success) {
+                IOUtils.close(metaOut, termsOut, indexOut, postingsWriter);
+            } else {
+                IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut, postingsWriter);
+            }
+        }
+    }
+
+    private static void writeBytesRef(DataOutput out, BytesRef bytes) throws IOException {
+        out.writeVInt(bytes.length);
+        out.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat2Tests.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat2Tests.java
new file mode 100644
index 0000000000000..a5422a03c1f22
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat2Tests.java
@@ -0,0 +1,149 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+
+/** Tests special cases of BlockPostingsFormat */
+public class BlockPostingsFormat2Tests extends LuceneTestCase {
+    Directory dir;
+    RandomIndexWriter iw;
+
+    @Override
+    public void setUp() throws Exception {
+        super.setUp();
+        dir = newFSDirectory(createTempDir("testDFBlockSize"));
+        IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+        iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat()));
+        iw = new RandomIndexWriter(random(), dir, iwc);
+        iw.setDoRandomForceMerge(false); // we will ourselves
+    }
+
+    @Override
+    public void tearDown() throws Exception {
+        iw.close();
+        TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
+        IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+        iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat()));
+        iwc.setOpenMode(OpenMode.APPEND);
+        IndexWriter iw = new IndexWriter(dir, iwc);
+        iw.forceMerge(1);
+        iw.close();
+        dir.close(); // just force a checkindex for now
+        super.tearDown();
+    }
+
+    private Document newDocument() {
+        Document doc = new Document();
+        for (IndexOptions option : IndexOptions.values()) {
+            if (option == IndexOptions.NONE) {
+                continue;
+            }
+            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+            // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
+            ft.setStoreTermVectors(true);
+            ft.setStoreTermVectorOffsets(true);
+            ft.setStoreTermVectorPositions(true);
+            ft.setStoreTermVectorPayloads(true);
+            ft.setIndexOptions(option);
+            doc.add(new Field(option.toString(), "", ft));
+        }
+        return doc;
+    }
+
+    /** tests terms with df = blocksize */
+    public void testDFBlockSize() throws Exception {
+        Document doc = newDocument();
+        for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE; i++) {
+            for (IndexableField f : doc.getFields()) {
+                ((Field) f).setStringValue(f.name() + " " + f.name() + "_2");
+            }
+            iw.addDocument(doc);
+        }
+    }
+
+    /** tests terms with df % blocksize = 0 */
+    public void testDFBlockSizeMultiple() throws Exception {
+        Document doc = newDocument();
+        for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE * 16; i++) {
+            for (IndexableField f : doc.getFields()) {
+                ((Field) f).setStringValue(f.name() + " " + f.name() + "_2");
+            }
+            iw.addDocument(doc);
+        }
+    }
+
+    /** tests terms with ttf = blocksize */
+    public void testTTFBlockSize() throws Exception {
+        Document doc = newDocument();
+        for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE / 2; i++) {
+            for (IndexableField f : doc.getFields()) {
+                ((Field) f).setStringValue(f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2");
+            }
+            iw.addDocument(doc);
+        }
+    }
+
+    /** tests terms with ttf % blocksize = 0 */
+    public void testTTFBlockSizeMultiple() throws Exception {
+        Document doc = newDocument();
+        for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE / 2; i++) {
+            for (IndexableField f : doc.getFields()) {
+                String proto = (f.name()
+                    + " "
+                    + f.name()
+                    + " "
+                    + f.name()
+                    + " "
+                    + f.name()
+                    + " "
+                    + f.name()
+                    + "_2 "
+                    + f.name()
+                    + "_2 "
+                    + f.name()
+                    + "_2 "
+                    + f.name()
+                    + "_2");
+                StringBuilder val = new StringBuilder();
+                for (int j = 0; j < 16; j++) {
+                    val.append(proto);
+                    val.append(" ");
+                }
+                ((Field) f).setStringValue(val.toString());
+            }
+            iw.addDocument(doc);
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat3Tests.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat3Tests.java
new file mode 100644
index 0000000000000..805fd67b03893
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormat3Tests.java
@@ -0,0 +1,477 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.analysis.MockFixedLengthPayloadFilter;
+import org.apache.lucene.tests.analysis.MockTokenizer;
+import org.apache.lucene.tests.analysis.MockVariableLengthPayloadFilter;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.util.English;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Random;
+
+/** Tests partial enumeration (only pulling a subset of the indexed data) */
+public class BlockPostingsFormat3Tests extends LuceneTestCase {
+    private final int MAXDOC = TEST_NIGHTLY ? Lucene50PostingsFormat.BLOCK_SIZE * 20 : Lucene50PostingsFormat.BLOCK_SIZE * 3;
+
+    // creates 8 fields with different options and does "duels" of fields against each other
+    public void test() throws Exception {
+        Directory dir = newDirectory();
+        Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
+            @Override
+            protected TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer tokenizer = new MockTokenizer();
+                if (fieldName.contains("payloadsFixed")) {
+                    TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
+                    return new TokenStreamComponents(tokenizer, filter);
+                } else if (fieldName.contains("payloadsVariable")) {
+                    TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
+                    return new TokenStreamComponents(tokenizer, filter);
+                } else {
+                    return new TokenStreamComponents(tokenizer);
+                }
+            }
+        };
+        IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+        iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat()));
+        // TODO we could actually add more fields implemented with different PFs
+        // or, just put this test into the usual rotation?
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+        Document doc = new Document();
+        FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
+        // turn this on for a cross-check
+        docsOnlyType.setStoreTermVectors(true);
+        docsOnlyType.setIndexOptions(IndexOptions.DOCS);
+
+        FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
+        // turn this on for a cross-check
+        docsAndFreqsType.setStoreTermVectors(true);
+        docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+
+        FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
+        // turn these on for a cross-check
+        positionsType.setStoreTermVectors(true);
+        positionsType.setStoreTermVectorPositions(true);
+        positionsType.setStoreTermVectorOffsets(true);
+        positionsType.setStoreTermVectorPayloads(true);
+        FieldType offsetsType = new FieldType(positionsType);
+        offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+        Field field1 = new Field("field1docs", "", docsOnlyType);
+        Field field2 = new Field("field2freqs", "", docsAndFreqsType);
+        Field field3 = new Field("field3positions", "", positionsType);
+        Field field4 = new Field("field4offsets", "", offsetsType);
+        Field field5 = new Field("field5payloadsFixed", "", positionsType);
+        Field field6 = new Field("field6payloadsVariable", "", positionsType);
+        Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
+        Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
+        doc.add(field1);
+        doc.add(field2);
+        doc.add(field3);
+        doc.add(field4);
+        doc.add(field5);
+        doc.add(field6);
+        doc.add(field7);
+        doc.add(field8);
+        for (int i = 0; i < MAXDOC; i++) {
+            String stringValue = Integer.toString(i)
+                + " verycommon "
+                + English.intToEnglish(i).replace('-', ' ')
+                + " "
+                + TestUtil.randomSimpleString(random());
+            field1.setStringValue(stringValue);
+            field2.setStringValue(stringValue);
+            field3.setStringValue(stringValue);
+            field4.setStringValue(stringValue);
+            field5.setStringValue(stringValue);
+            field6.setStringValue(stringValue);
+            field7.setStringValue(stringValue);
+            field8.setStringValue(stringValue);
+            iw.addDocument(doc);
+        }
+        iw.close();
+        verify(dir);
+        TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
+        iwc = newIndexWriterConfig(analyzer);
+        iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat()));
+        iwc.setOpenMode(OpenMode.APPEND);
+        IndexWriter iw2 = new IndexWriter(dir, iwc);
+        iw2.forceMerge(1);
+        iw2.close();
+        verify(dir);
+        dir.close();
+    }
+
+    private void verify(Directory dir) throws Exception {
+        DirectoryReader ir = DirectoryReader.open(dir);
+        for (LeafReaderContext leaf : ir.leaves()) {
+            LeafReader leafReader = leaf.reader();
+            assertTerms(leafReader.terms("field1docs"), leafReader.terms("field2freqs"), true);
+            assertTerms(leafReader.terms("field3positions"), leafReader.terms("field4offsets"), true);
+            assertTerms(leafReader.terms("field4offsets"), leafReader.terms("field5payloadsFixed"), true);
+            assertTerms(leafReader.terms("field5payloadsFixed"), leafReader.terms("field6payloadsVariable"), true);
+            assertTerms(leafReader.terms("field6payloadsVariable"), leafReader.terms("field7payloadsFixedOffsets"), true);
+            assertTerms(leafReader.terms("field7payloadsFixedOffsets"), leafReader.terms("field8payloadsVariableOffsets"), true);
+        }
+        ir.close();
+    }
+
+    // following code is almost an exact dup of code from TestDuelingCodecs: sorry!
+
+    public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
+        if (leftTerms == null || rightTerms == null) {
+            assertNull(leftTerms);
+            assertNull(rightTerms);
+            return;
+        }
+        assertTermsStatistics(leftTerms, rightTerms);
+
+        // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be
+        // different
+
+        boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
+        boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
+        TermsEnum leftTermsEnum = leftTerms.iterator();
+        TermsEnum rightTermsEnum = rightTerms.iterator();
+        assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
+
+        assertTermsSeeking(leftTerms, rightTerms);
+
+        if (deep) {
+            int numIntersections = atLeast(3);
+            for (int i = 0; i < numIntersections; i++) {
+                String re = AutomatonTestUtil.randomRegexp(random());
+                CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
+                if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+                    // TODO: test start term too
+                    TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
+                    TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
+                    assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
+                }
+            }
+        }
+    }
+
+    private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
+        TermsEnum leftEnum = null;
+        TermsEnum rightEnum = null;
+
+        // just an upper bound
+        int numTests = atLeast(20);
+        Random random = random();
+
+        // collect this number of terms from the left side
+        HashSet<BytesRef> tests = new HashSet<>();
+        int numPasses = 0;
+        while (numPasses < 10 && tests.size() < numTests) {
+            leftEnum = leftTerms.iterator();
+            BytesRef term = null;
+            while ((term = leftEnum.next()) != null) {
+                int code = random.nextInt(10);
+                if (code == 0) {
+                    // the term
+                    tests.add(BytesRef.deepCopyOf(term));
+                } else if (code == 1) {
+                    // truncated subsequence of term
+                    term = BytesRef.deepCopyOf(term);
+                    if (term.length > 0) {
+                        // truncate it
+                        term.length = random.nextInt(term.length);
+                    }
+                } else if (code == 2) {
+                    // term, but ensure a non-zero offset
+                    byte[] newbytes = new byte[term.length + 5];
+                    System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
+                    tests.add(new BytesRef(newbytes, 5, term.length));
+                }
+            }
+            numPasses++;
+        }
+
+        ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
+        Collections.shuffle(shuffledTests, random);
+
+        for (BytesRef b : shuffledTests) {
+            leftEnum = leftTerms.iterator();
+            rightEnum = rightTerms.iterator();
+
+            assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
+            assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
+
+            SeekStatus leftStatus;
+            SeekStatus rightStatus;
+
+            leftStatus = leftEnum.seekCeil(b);
+            rightStatus = rightEnum.seekCeil(b);
+            assertEquals(leftStatus, rightStatus);
+            if (leftStatus != SeekStatus.END) {
+                assertEquals(leftEnum.term(), rightEnum.term());
+            }
+
+            leftStatus = leftEnum.seekCeil(b);
+            rightStatus = rightEnum.seekCeil(b);
+            assertEquals(leftStatus, rightStatus);
+            if (leftStatus != SeekStatus.END) {
+                assertEquals(leftEnum.term(), rightEnum.term());
+            }
+        }
+    }
+
+    /** checks collection-level statistics on Terms */
+    public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
+        assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
+        assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
+        if (leftTerms.hasFreqs() && rightTerms.hasFreqs()) {
+            assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
+        }
+        if (leftTerms.size() != -1 && rightTerms.size() != -1) {
+            assertEquals(leftTerms.size(), rightTerms.size());
+        }
+    }
+
+    /**
+     * checks the terms enum sequentially if deep is false, it does a 'shallow' test that doesnt go
+     * down to the docsenums
+     */
+    public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasFreqs, boolean hasPositions)
+        throws Exception {
+        BytesRef term;
+        PostingsEnum leftPositions = null;
+        PostingsEnum rightPositions = null;
+        PostingsEnum leftDocs = null;
+        PostingsEnum rightDocs = null;
+
+        while ((term = leftTermsEnum.next()) != null) {
+            assertEquals(term, rightTermsEnum.next());
+            assertTermStats(leftTermsEnum, rightTermsEnum, hasFreqs);
+            if (deep) {
+                if (hasPositions) {
+                    // with payloads + off
+                    assertDocsAndPositionsEnum(
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL)
+                    );
+
+                    assertPositionsSkipping(
+                        leftTermsEnum.docFreq(),
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL)
+                    );
+                    // with payloads only
+                    assertDocsAndPositionsEnum(
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS)
+                    );
+
+                    assertPositionsSkipping(
+                        leftTermsEnum.docFreq(),
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS)
+                    );
+
+                    // with offsets only
+                    assertDocsAndPositionsEnum(
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS)
+                    );
+
+                    assertPositionsSkipping(
+                        leftTermsEnum.docFreq(),
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS)
+                    );
+
+                    // with positions only
+                    assertDocsAndPositionsEnum(
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS)
+                    );
+
+                    assertPositionsSkipping(
+                        leftTermsEnum.docFreq(),
+                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS),
+                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS)
+                    );
+                }
+
+                // with freqs:
+                assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs));
+
+                // w/o freqs:
+                assertDocsEnum(
+                    leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE),
+                    rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE)
+                );
+
+                // with freqs:
+                assertDocsSkipping(
+                    leftTermsEnum.docFreq(),
+                    leftDocs = leftTermsEnum.postings(leftDocs),
+                    rightDocs = rightTermsEnum.postings(rightDocs)
+                );
+
+                // w/o freqs:
+                assertDocsSkipping(
+                    leftTermsEnum.docFreq(),
+                    leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE),
+                    rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE)
+                );
+            }
+        }
+        assertNull(rightTermsEnum.next());
+    }
+
+    /** checks term-level statistics */
+    public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean bothHaveFreqs) throws Exception {
+        assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
+        if (bothHaveFreqs) {
+            assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
+        }
+    }
+
+    /** checks docs + freqs + positions + payloads, sequentially */
+    public void assertDocsAndPositionsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
+        assertNotNull(leftDocs);
+        assertNotNull(rightDocs);
+        assertEquals(-1, leftDocs.docID());
+        assertEquals(-1, rightDocs.docID());
+        int docid;
+        while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+            assertEquals(docid, rightDocs.nextDoc());
+            int freq = leftDocs.freq();
+            assertEquals(freq, rightDocs.freq());
+            for (int i = 0; i < freq; i++) {
+                assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
+                // we don't assert offsets/payloads, they are allowed to be different
+            }
+        }
+        assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
+    }
+
+    /** checks docs + freqs, sequentially */
+    public void assertDocsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
+        if (leftDocs == null) {
+            assertNull(rightDocs);
+            return;
+        }
+        assertEquals(-1, leftDocs.docID());
+        assertEquals(-1, rightDocs.docID());
+        int docid;
+        while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+            assertEquals(docid, rightDocs.nextDoc());
+            // we don't assert freqs, they are allowed to be different
+        }
+        assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
+    }
+
+    /** checks advancing docs */
+    public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
+        if (leftDocs == null) {
+            assertNull(rightDocs);
+            return;
+        }
+        int docid = -1;
+        int averageGap = MAXDOC / (1 + docFreq);
+        int skipInterval = 16;
+
+        while (true) {
+            if (random().nextBoolean()) {
+                // nextDoc()
+                docid = leftDocs.nextDoc();
+                assertEquals(docid, rightDocs.nextDoc());
+            } else {
+                // advance()
+                int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
+                docid = leftDocs.advance(skip);
+                assertEquals(docid, rightDocs.advance(skip));
+            }
+
+            if (docid == DocIdSetIterator.NO_MORE_DOCS) {
+                return;
+            }
+            // we don't assert freqs, they are allowed to be different
+        }
+    }
+
+    /** checks advancing docs + positions */
+    public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
+        if (leftDocs == null || rightDocs == null) {
+            assertNull(leftDocs);
+            assertNull(rightDocs);
+            return;
+        }
+
+        int docid = -1;
+        int averageGap = MAXDOC / (1 + docFreq);
+        int skipInterval = 16;
+
+        while (true) {
+            if (random().nextBoolean()) {
+                // nextDoc()
+                docid = leftDocs.nextDoc();
+                assertEquals(docid, rightDocs.nextDoc());
+            } else {
+                // advance()
+                int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
+                docid = leftDocs.advance(skip);
+                assertEquals(docid, rightDocs.advance(skip));
+            }
+
+            if (docid == DocIdSetIterator.NO_MORE_DOCS) {
+                return;
+            }
+            int freq = leftDocs.freq();
+            assertEquals(freq, rightDocs.freq());
+            for (int i = 0; i < freq; i++) {
+                assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
+                // we don't compare the payloads, it's allowed that one is empty etc
+            }
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormatTests.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormatTests.java
new file mode 100644
index 0000000000000..bc04dceea30cd
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BlockPostingsFormatTests.java
@@ -0,0 +1,138 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.index.BasePostingsFormatTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree.FieldReader;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree.Stats;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.Lucene50ScoreSkipReader.MutableImpactList;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/** Tests BlockPostingsFormat */
+public class BlockPostingsFormatTests extends BasePostingsFormatTestCase {
+    private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat());
+
+    @Override
+    protected Codec getCodec() {
+        return codec;
+    }
+
+    /** Make sure the final sub-block(s) are not skipped. */
+    public void testFinalBlock() throws Exception {
+        Directory d = newDirectory();
+        IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
+        for (int i = 0; i < 25; i++) {
+            Document doc = new Document();
+            doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
+            doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO));
+            w.addDocument(doc);
+        }
+        w.forceMerge(1);
+
+        DirectoryReader r = DirectoryReader.open(w);
+        assertEquals(1, r.leaves().size());
+        FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
+        // We should see exactly two blocks: one root block (prefix empty string) and one block for z*
+        // terms (prefix z):
+        Stats stats = field.getStats();
+        assertEquals(0, stats.floorBlockCount);
+        assertEquals(2, stats.nonFloorBlockCount);
+        r.close();
+        w.close();
+        d.close();
+    }
+
+    public void testImpactSerialization() throws IOException {
+        // omit norms and omit freqs
+        doTestImpactSerialization(Collections.singletonList(new Impact(1, 1L)));
+
+        // omit freqs
+        doTestImpactSerialization(Collections.singletonList(new Impact(1, 42L)));
+        // omit freqs with very large norms
+        doTestImpactSerialization(Collections.singletonList(new Impact(1, -100L)));
+
+        // omit norms
+        doTestImpactSerialization(Collections.singletonList(new Impact(30, 1L)));
+        // omit norms with large freq
+        doTestImpactSerialization(Collections.singletonList(new Impact(500, 1L)));
+
+        // freqs and norms, basic
+        doTestImpactSerialization(
+            Arrays.asList(
+                new Impact(1, 7L),
+                new Impact(3, 9L),
+                new Impact(7, 10L),
+                new Impact(15, 11L),
+                new Impact(20, 13L),
+                new Impact(28, 14L)
+            )
+        );
+
+        // freqs and norms, high values
+        doTestImpactSerialization(
+            Arrays.asList(
+                new Impact(2, 2L),
+                new Impact(10, 10L),
+                new Impact(12, 50L),
+                new Impact(50, -100L),
+                new Impact(1000, -80L),
+                new Impact(1005, -3L)
+            )
+        );
+    }
+
+    private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
+        CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
+        for (Impact impact : impacts) {
+            acc.add(impact.freq, impact.norm);
+        }
+        try (Directory dir = newDirectory()) {
+            try (IndexOutput out = EndiannessReverserUtil.createOutput(dir, "foo", IOContext.DEFAULT)) {
+                Lucene50SkipWriter.writeImpacts(acc, out);
+            }
+            try (IndexInput in = EndiannessReverserUtil.openInput(dir, "foo", IOContext.DEFAULT)) {
+                byte[] b = new byte[Math.toIntExact(in.length())];
+                in.readBytes(b, 0, b.length);
+                List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
+                assertEquals(impacts, impacts2);
+            }
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java
new file mode 100644
index 0000000000000..7e3a92acc4682
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java
@@ -0,0 +1,513 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.PushPostingsWriterBase;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.IntBlockTermState;
+
+import java.io.IOException;
+
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.BLOCK_SIZE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.DOC_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.DOC_EXTENSION;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.MAX_SKIP_LEVELS;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.PAY_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.PAY_EXTENSION;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.POS_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.POS_EXTENSION;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.TERMS_CODEC;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat.VERSION_CURRENT;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
+
+/**
+ * Concrete class that writes docId(maybe frq,pos,offset,payloads) list
+ * with postings format.
+ *
+ * Postings list for each term will be stored separately.
+ *
+ * @see Lucene50SkipWriter for details about skipping setting and postings layout.
+ * @lucene.experimental
+ */
+public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
+
+    IndexOutput docOut;
+    IndexOutput posOut;
+    IndexOutput payOut;
+
+    static final IntBlockTermState emptyState = new IntBlockTermState();
+    IntBlockTermState lastState;
+
+    // Holds starting file pointers for current term:
+    private long docStartFP;
+    private long posStartFP;
+    private long payStartFP;
+
+    final int[] docDeltaBuffer;
+    final int[] freqBuffer;
+    private int docBufferUpto;
+
+    final int[] posDeltaBuffer;
+    final int[] payloadLengthBuffer;
+    final int[] offsetStartDeltaBuffer;
+    final int[] offsetLengthBuffer;
+    private int posBufferUpto;
+
+    private byte[] payloadBytes;
+    private int payloadByteUpto;
+
+    private int lastBlockDocID;
+    private long lastBlockPosFP;
+    private long lastBlockPayFP;
+    private int lastBlockPosBufferUpto;
+    private int lastBlockPayloadByteUpto;
+
+    private int lastDocID;
+    private int lastPosition;
+    private int lastStartOffset;
+    private int docCount;
+
+    final byte[] encoded;
+
+    private final ForUtil forUtil;
+    private final Lucene50SkipWriter skipWriter;
+
+    private boolean fieldHasNorms;
+    private NumericDocValues norms;
+    private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator = new CompetitiveImpactAccumulator();
+
+    /** Creates a postings writer */
+    public Lucene50PostingsWriter(SegmentWriteState state) throws IOException {
+        final float acceptableOverheadRatio = PackedInts.COMPACT;
+
+        String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
+        docOut = state.directory.createOutput(docFileName, state.context);
+        IndexOutput posOut = null;
+        IndexOutput payOut = null;
+        boolean success = false;
+        try {
+            CodecUtil.writeIndexHeader(docOut, DOC_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+            forUtil = new ForUtil(acceptableOverheadRatio, docOut);
+            if (state.fieldInfos.hasProx()) {
+                posDeltaBuffer = new int[MAX_DATA_SIZE];
+                String posFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, POS_EXTENSION);
+                posOut = state.directory.createOutput(posFileName, state.context);
+                CodecUtil.writeIndexHeader(posOut, POS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+
+                if (state.fieldInfos.hasPayloads()) {
+                    payloadBytes = new byte[128];
+                    payloadLengthBuffer = new int[MAX_DATA_SIZE];
+                } else {
+                    payloadBytes = null;
+                    payloadLengthBuffer = null;
+                }
+
+                if (state.fieldInfos.hasOffsets()) {
+                    offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
+                    offsetLengthBuffer = new int[MAX_DATA_SIZE];
+                } else {
+                    offsetStartDeltaBuffer = null;
+                    offsetLengthBuffer = null;
+                }
+
+                if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
+                    String payFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAY_EXTENSION);
+                    payOut = state.directory.createOutput(payFileName, state.context);
+                    CodecUtil.writeIndexHeader(payOut, PAY_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+                }
+            } else {
+                posDeltaBuffer = null;
+                payloadLengthBuffer = null;
+                offsetStartDeltaBuffer = null;
+                offsetLengthBuffer = null;
+                payloadBytes = null;
+            }
+            this.payOut = payOut;
+            this.posOut = posOut;
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+            }
+        }
+
+        docDeltaBuffer = new int[MAX_DATA_SIZE];
+        freqBuffer = new int[MAX_DATA_SIZE];
+
+        // TODO: should we try skipping every 2/4 blocks...?
+        skipWriter = new Lucene50SkipWriter(MAX_SKIP_LEVELS, BLOCK_SIZE, state.segmentInfo.maxDoc(), docOut, posOut, payOut);
+
+        encoded = new byte[MAX_ENCODED_SIZE];
+    }
+
+    @Override
+    public IntBlockTermState newTermState() {
+        return new IntBlockTermState();
+    }
+
+    @Override
+    public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
+        CodecUtil.writeIndexHeader(termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+        termsOut.writeVInt(BLOCK_SIZE);
+    }
+
+    @Override
+    public void setField(FieldInfo fieldInfo) {
+        super.setField(fieldInfo);
+        skipWriter.setField(writePositions, writeOffsets, writePayloads);
+        lastState = emptyState;
+        fieldHasNorms = fieldInfo.hasNorms();
+    }
+
+    @Override
+    public void startTerm(NumericDocValues norms) {
+        docStartFP = docOut.getFilePointer();
+        if (writePositions) {
+            posStartFP = posOut.getFilePointer();
+            if (writePayloads || writeOffsets) {
+                payStartFP = payOut.getFilePointer();
+            }
+        }
+        lastDocID = 0;
+        lastBlockDocID = -1;
+        skipWriter.resetSkip();
+        this.norms = norms;
+        competitiveFreqNormAccumulator.clear();
+    }
+
+    @Override
+    public void startDoc(int docID, int termDocFreq) throws IOException {
+        // Have collected a block of docs, and get a new doc.
+        // Should write skip data as well as postings list for
+        // current block.
+        if (lastBlockDocID != -1 && docBufferUpto == 0) {
+            skipWriter.bufferSkip(
+                lastBlockDocID,
+                competitiveFreqNormAccumulator,
+                docCount,
+                lastBlockPosFP,
+                lastBlockPayFP,
+                lastBlockPosBufferUpto,
+                lastBlockPayloadByteUpto
+            );
+            competitiveFreqNormAccumulator.clear();
+        }
+
+        final int docDelta = docID - lastDocID;
+
+        if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
+            throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )", docOut);
+        }
+
+        docDeltaBuffer[docBufferUpto] = docDelta;
+        if (writeFreqs) {
+            freqBuffer[docBufferUpto] = termDocFreq;
+        }
+
+        docBufferUpto++;
+        docCount++;
+
+        if (docBufferUpto == BLOCK_SIZE) {
+            forUtil.writeBlock(docDeltaBuffer, encoded, docOut);
+            if (writeFreqs) {
+                forUtil.writeBlock(freqBuffer, encoded, docOut);
+            }
+            // NOTE: don't set docBufferUpto back to 0 here;
+            // finishDoc will do so (because it needs to see that
+            // the block was filled so it can save skip data)
+        }
+
+        lastDocID = docID;
+        lastPosition = 0;
+        lastStartOffset = 0;
+
+        long norm;
+        if (fieldHasNorms) {
+            boolean found = norms.advanceExact(docID);
+            if (found == false) {
+                // This can happen if indexing hits a problem after adding a doc to the
+                // postings but before buffering the norm. Such documents are written
+                // deleted and will go away on the first merge.
+                norm = 1L;
+            } else {
+                norm = norms.longValue();
+                assert norm != 0 : docID;
+            }
+        } else {
+            norm = 1L;
+        }
+
+        competitiveFreqNormAccumulator.add(writeFreqs ? termDocFreq : 1, norm);
+    }
+
+    @Override
+    public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
+        if (position > IndexWriter.MAX_POSITION) {
+            throw new CorruptIndexException(
+                "position=" + position + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + ")",
+                docOut
+            );
+        }
+        if (position < 0) {
+            throw new CorruptIndexException("position=" + position + " is < 0", docOut);
+        }
+        posDeltaBuffer[posBufferUpto] = position - lastPosition;
+        if (writePayloads) {
+            if (payload == null || payload.length == 0) {
+                // no payload
+                payloadLengthBuffer[posBufferUpto] = 0;
+            } else {
+                payloadLengthBuffer[posBufferUpto] = payload.length;
+                if (payloadByteUpto + payload.length > payloadBytes.length) {
+                    payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payload.length);
+                }
+                System.arraycopy(payload.bytes, payload.offset, payloadBytes, payloadByteUpto, payload.length);
+                payloadByteUpto += payload.length;
+            }
+        }
+
+        if (writeOffsets) {
+            assert startOffset >= lastStartOffset;
+            assert endOffset >= startOffset;
+            offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastStartOffset;
+            offsetLengthBuffer[posBufferUpto] = endOffset - startOffset;
+            lastStartOffset = startOffset;
+        }
+
+        posBufferUpto++;
+        lastPosition = position;
+        if (posBufferUpto == BLOCK_SIZE) {
+            forUtil.writeBlock(posDeltaBuffer, encoded, posOut);
+
+            if (writePayloads) {
+                forUtil.writeBlock(payloadLengthBuffer, encoded, payOut);
+                payOut.writeVInt(payloadByteUpto);
+                payOut.writeBytes(payloadBytes, 0, payloadByteUpto);
+                payloadByteUpto = 0;
+            }
+            if (writeOffsets) {
+                forUtil.writeBlock(offsetStartDeltaBuffer, encoded, payOut);
+                forUtil.writeBlock(offsetLengthBuffer, encoded, payOut);
+            }
+            posBufferUpto = 0;
+        }
+    }
+
+    @Override
+    public void finishDoc() throws IOException {
+        // Since we don't know df for current term, we had to buffer
+        // those skip data for each block, and when a new doc comes,
+        // write them to skip file.
+        if (docBufferUpto == BLOCK_SIZE) {
+            lastBlockDocID = lastDocID;
+            if (posOut != null) {
+                if (payOut != null) {
+                    lastBlockPayFP = payOut.getFilePointer();
+                }
+                lastBlockPosFP = posOut.getFilePointer();
+                lastBlockPosBufferUpto = posBufferUpto;
+                lastBlockPayloadByteUpto = payloadByteUpto;
+            }
+            docBufferUpto = 0;
+        }
+    }
+
+    /** Called when we are done adding docs to this term */
+    @Override
+    public void finishTerm(BlockTermState _state) throws IOException {
+        IntBlockTermState state = (IntBlockTermState) _state;
+        assert state.docFreq > 0;
+
+        // TODO: wasteful we are counting this (counting # docs
+        // for this term) in two places?
+        assert state.docFreq == docCount : state.docFreq + " vs " + docCount;
+
+        // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
+        final int singletonDocID;
+        if (state.docFreq == 1) {
+            // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
+            singletonDocID = docDeltaBuffer[0];
+        } else {
+            singletonDocID = -1;
+            // vInt encode the remaining doc deltas and freqs:
+            for (int i = 0; i < docBufferUpto; i++) {
+                final int docDelta = docDeltaBuffer[i];
+                final int freq = freqBuffer[i];
+                if (writeFreqs == false) {
+                    docOut.writeVInt(docDelta);
+                } else if (freqBuffer[i] == 1) {
+                    docOut.writeVInt((docDelta << 1) | 1);
+                } else {
+                    docOut.writeVInt(docDelta << 1);
+                    docOut.writeVInt(freq);
+                }
+            }
+        }
+
+        final long lastPosBlockOffset;
+
+        if (writePositions) {
+            // totalTermFreq is just total number of positions(or payloads, or offsets)
+            // associated with current term.
+            assert state.totalTermFreq != -1;
+            if (state.totalTermFreq > BLOCK_SIZE) {
+                // record file offset for last pos in last block
+                lastPosBlockOffset = posOut.getFilePointer() - posStartFP;
+            } else {
+                lastPosBlockOffset = -1;
+            }
+            if (posBufferUpto > 0) {
+                // TODO: should we send offsets/payloads to
+                // .pay...? seems wasteful (have to store extra
+                // vLong for low (< BLOCK_SIZE) DF terms = vast vast
+                // majority)
+
+                // vInt encode the remaining positions/payloads/offsets:
+                int lastPayloadLength = -1;  // force first payload length to be written
+                int lastOffsetLength = -1;   // force first offset length to be written
+                int payloadBytesReadUpto = 0;
+                for (int i = 0; i < posBufferUpto; i++) {
+                    final int posDelta = posDeltaBuffer[i];
+                    if (writePayloads) {
+                        final int payloadLength = payloadLengthBuffer[i];
+                        if (payloadLength != lastPayloadLength) {
+                            lastPayloadLength = payloadLength;
+                            posOut.writeVInt((posDelta << 1) | 1);
+                            posOut.writeVInt(payloadLength);
+                        } else {
+                            posOut.writeVInt(posDelta << 1);
+                        }
+
+                        if (payloadLength != 0) {
+                            posOut.writeBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
+                            payloadBytesReadUpto += payloadLength;
+                        }
+                    } else {
+                        posOut.writeVInt(posDelta);
+                    }
+
+                    if (writeOffsets) {
+                        int delta = offsetStartDeltaBuffer[i];
+                        int length = offsetLengthBuffer[i];
+                        if (length == lastOffsetLength) {
+                            posOut.writeVInt(delta << 1);
+                        } else {
+                            posOut.writeVInt(delta << 1 | 1);
+                            posOut.writeVInt(length);
+                            lastOffsetLength = length;
+                        }
+                    }
+                }
+
+                if (writePayloads) {
+                    assert payloadBytesReadUpto == payloadByteUpto;
+                    payloadByteUpto = 0;
+                }
+            }
+        } else {
+            lastPosBlockOffset = -1;
+        }
+
+        long skipOffset;
+        if (docCount > BLOCK_SIZE) {
+            skipOffset = skipWriter.writeSkip(docOut) - docStartFP;
+        } else {
+            skipOffset = -1;
+        }
+
+        state.docStartFP = docStartFP;
+        state.posStartFP = posStartFP;
+        state.payStartFP = payStartFP;
+        state.singletonDocID = singletonDocID;
+        state.skipOffset = skipOffset;
+        state.lastPosBlockOffset = lastPosBlockOffset;
+        docBufferUpto = 0;
+        posBufferUpto = 0;
+        lastDocID = 0;
+        docCount = 0;
+    }
+
+    @Override
+    public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+        IntBlockTermState state = (IntBlockTermState) _state;
+        if (absolute) {
+            lastState = emptyState;
+        }
+        out.writeVLong(state.docStartFP - lastState.docStartFP);
+        if (writePositions) {
+            out.writeVLong(state.posStartFP - lastState.posStartFP);
+            if (writePayloads || writeOffsets) {
+                out.writeVLong(state.payStartFP - lastState.payStartFP);
+            }
+        }
+        if (state.singletonDocID != -1) {
+            out.writeVInt(state.singletonDocID);
+        }
+        if (writePositions) {
+            if (state.lastPosBlockOffset != -1) {
+                out.writeVLong(state.lastPosBlockOffset);
+            }
+        }
+        if (state.skipOffset != -1) {
+            out.writeVLong(state.skipOffset);
+        }
+        lastState = state;
+    }
+
+    @Override
+    public void close() throws IOException {
+        // TODO: add a finish() at least to PushBase? DV too...?
+        boolean success = false;
+        try {
+            if (docOut != null) {
+                CodecUtil.writeFooter(docOut);
+            }
+            if (posOut != null) {
+                CodecUtil.writeFooter(posOut);
+            }
+            if (payOut != null) {
+                CodecUtil.writeFooter(payOut);
+            }
+            success = true;
+        } finally {
+            if (success) {
+                IOUtils.close(docOut, posOut, payOut);
+            } else {
+                IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+            }
+            docOut = posOut = payOut = null;
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50RWPostingsFormat.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50RWPostingsFormat.java
new file mode 100644
index 0000000000000..11ed11e46d6b4
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50RWPostingsFormat.java
@@ -0,0 +1,56 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.index.SegmentWriteState;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene40.blocktree.Lucene40BlockTreeTermsWriter;
+
+import java.io.IOException;
+
+public class Lucene50RWPostingsFormat extends BWCLucene50PostingsFormat {
+
+    public Lucene50RWPostingsFormat() {
+        super("Lucene50RW");
+    }
+
+    @Override
+    public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+        PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
+        boolean success = false;
+        try {
+            FieldsConsumer ret = new Lucene40BlockTreeTermsWriter(
+                state,
+                postingsWriter,
+                Lucene40BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
+                Lucene40BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE
+            );
+            success = true;
+            return ret;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(postingsWriter);
+            }
+        }
+    }
+
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipWriter.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipWriter.java
new file mode 100644
index 0000000000000..9555f266e0611
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50SkipWriter.java
@@ -0,0 +1,233 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene50;
+
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * Write skip lists with multiple levels, and support skip within block ints.
+ *
+ * <p>Assume that docFreq = 28, skipInterval = blockSize = 12
+ *
+ * <pre>
+ *  |       block#0       | |      block#1        | |vInts|
+ *  d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ *                          ^                       ^       (level 0 skip point)
+ * </pre>
+ *
+ * Note that skipWriter will ignore first document in block#0, since it is useless as a skip point.
+ * Also, we'll never skip into the vInts block, only record skip data at the start its start
+ * point(if it exist).
+ *
+ * <p>For each skip point, we will record: 1. docID in former position, i.e. for position 12, record
+ * docID[11], etc. 2. its related file points(position, payload), 3. related numbers or
+ * uptos(position, payload). 4. start offset.
+ */
+final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
+    private int[] lastSkipDoc;
+    private long[] lastSkipDocPointer;
+    private long[] lastSkipPosPointer;
+    private long[] lastSkipPayPointer;
+    private int[] lastPayloadByteUpto;
+
+    private final IndexOutput docOut;
+    private final IndexOutput posOut;
+    private final IndexOutput payOut;
+
+    private int curDoc;
+    private long curDocPointer;
+    private long curPosPointer;
+    private long curPayPointer;
+    private int curPosBufferUpto;
+    private int curPayloadByteUpto;
+    private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
+    private boolean fieldHasPositions;
+    private boolean fieldHasOffsets;
+    private boolean fieldHasPayloads;
+
+    Lucene50SkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
+        super(blockSize, 8, maxSkipLevels, docCount);
+        this.docOut = docOut;
+        this.posOut = posOut;
+        this.payOut = payOut;
+
+        lastSkipDoc = new int[maxSkipLevels];
+        lastSkipDocPointer = new long[maxSkipLevels];
+        if (posOut != null) {
+            lastSkipPosPointer = new long[maxSkipLevels];
+            if (payOut != null) {
+                lastSkipPayPointer = new long[maxSkipLevels];
+            }
+            lastPayloadByteUpto = new int[maxSkipLevels];
+        }
+        curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
+        for (int i = 0; i < maxSkipLevels; ++i) {
+            curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
+        }
+    }
+
+    public void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
+        this.fieldHasPositions = fieldHasPositions;
+        this.fieldHasOffsets = fieldHasOffsets;
+        this.fieldHasPayloads = fieldHasPayloads;
+    }
+
+    // tricky: we only skip data for blocks (terms with more than 128 docs), but re-init'ing the
+    // skipper
+    // is pretty slow for rare terms in large segments as we have to fill O(log #docs in segment) of
+    // junk.
+    // this is the vast majority of terms (worst case: ID field or similar). so in resetSkip() we
+    // save
+    // away the previous pointers, and lazy-init only if we need to buffer skip data for the term.
+    private boolean initialized;
+    long lastDocFP;
+    long lastPosFP;
+    long lastPayFP;
+
+    @Override
+    public void resetSkip() {
+        lastDocFP = docOut.getFilePointer();
+        if (fieldHasPositions) {
+            lastPosFP = posOut.getFilePointer();
+            if (fieldHasOffsets || fieldHasPayloads) {
+                lastPayFP = payOut.getFilePointer();
+            }
+        }
+        if (initialized) {
+            for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
+                acc.clear();
+            }
+        }
+        initialized = false;
+    }
+
+    private void initSkip() {
+        if (initialized == false) {
+            super.resetSkip();
+            Arrays.fill(lastSkipDoc, 0);
+            Arrays.fill(lastSkipDocPointer, lastDocFP);
+            if (fieldHasPositions) {
+                Arrays.fill(lastSkipPosPointer, lastPosFP);
+                if (fieldHasPayloads) {
+                    Arrays.fill(lastPayloadByteUpto, 0);
+                }
+                if (fieldHasOffsets || fieldHasPayloads) {
+                    Arrays.fill(lastSkipPayPointer, lastPayFP);
+                }
+            }
+            // sets of competitive freq,norm pairs should be empty at this point
+            assert Arrays.stream(curCompetitiveFreqNorms)
+                .map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
+                .mapToInt(Collection::size)
+                .sum() == 0;
+            initialized = true;
+        }
+    }
+
+    /** Sets the values for the current skip data. */
+    public void bufferSkip(
+        int doc,
+        CompetitiveImpactAccumulator competitiveFreqNorms,
+        int numDocs,
+        long posFP,
+        long payFP,
+        int posBufferUpto,
+        int payloadByteUpto
+    ) throws IOException {
+        initSkip();
+        this.curDoc = doc;
+        this.curDocPointer = docOut.getFilePointer();
+        this.curPosPointer = posFP;
+        this.curPayPointer = payFP;
+        this.curPosBufferUpto = posBufferUpto;
+        this.curPayloadByteUpto = payloadByteUpto;
+        this.curCompetitiveFreqNorms[0].addAll(competitiveFreqNorms);
+        bufferSkip(numDocs);
+    }
+
+    private final ByteBuffersDataOutput freqNormOut = ByteBuffersDataOutput.newResettableInstance();
+
+    @Override
+    protected void writeSkipData(int level, DataOutput skipBuffer) throws IOException {
+
+        int delta = curDoc - lastSkipDoc[level];
+
+        skipBuffer.writeVInt(delta);
+        lastSkipDoc[level] = curDoc;
+
+        skipBuffer.writeVLong(curDocPointer - lastSkipDocPointer[level]);
+        lastSkipDocPointer[level] = curDocPointer;
+
+        if (fieldHasPositions) {
+
+            skipBuffer.writeVLong(curPosPointer - lastSkipPosPointer[level]);
+            lastSkipPosPointer[level] = curPosPointer;
+            skipBuffer.writeVInt(curPosBufferUpto);
+
+            if (fieldHasPayloads) {
+                skipBuffer.writeVInt(curPayloadByteUpto);
+            }
+
+            if (fieldHasOffsets || fieldHasPayloads) {
+                skipBuffer.writeVLong(curPayPointer - lastSkipPayPointer[level]);
+                lastSkipPayPointer[level] = curPayPointer;
+            }
+        }
+
+        CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
+        assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
+        if (level + 1 < numberOfSkipLevels) {
+            curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
+        }
+        writeImpacts(competitiveFreqNorms, freqNormOut);
+        skipBuffer.writeVInt(Math.toIntExact(freqNormOut.size()));
+        freqNormOut.copyTo(skipBuffer);
+        freqNormOut.reset();
+        competitiveFreqNorms.clear();
+    }
+
+    static void writeImpacts(CompetitiveImpactAccumulator acc, DataOutput out) throws IOException {
+        Collection<Impact> impacts = acc.getCompetitiveFreqNormPairs();
+        Impact previous = new Impact(0, 0);
+        for (Impact impact : impacts) {
+            assert impact.freq > previous.freq;
+            assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
+            int freqDelta = impact.freq - previous.freq - 1;
+            long normDelta = impact.norm - previous.norm - 1;
+            if (normDelta == 0) {
+                // most of time, norm only increases by 1, so we can fold everything in a single byte
+                out.writeVInt(freqDelta << 1);
+            } else {
+                out.writeVInt((freqDelta << 1) | 1);
+                out.writeZLong(normDelta);
+            }
+            previous = impact;
+        }
+    }
+}
diff --git a/x-pack/plugin/old-lucene-versions/src/test/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/x-pack/plugin/old-lucene-versions/src/test/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
new file mode 100644
index 0000000000000..b2c1a7ca06a52
--- /dev/null
+++ b/x-pack/plugin/old-lucene-versions/src/test/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.Lucene50RWPostingsFormat
diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
index 8bb8cb6b08fe4..d63ea5301fcdb 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
@@ -16,6 +16,8 @@
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusRequest;
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
+import org.elasticsearch.action.get.GetRequest;
+import org.elasticsearch.action.get.GetResponse;
 import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.client.Request;
@@ -453,6 +455,7 @@ private void assertDocs(
             XContentBuilder mappingBuilder = JsonXContent.contentBuilder();
             mappingBuilder.startObject().startObject("properties");
             mappingBuilder.startObject("val").field("type", "long").endObject();
+            mappingBuilder.startObject("test").field("type", "text").endObject();
             mappingBuilder.endObject().endObject();
             assertTrue(
                 client.indices().putMapping(new PutMappingRequest(index).source(mappingBuilder), RequestOptions.DEFAULT).isAcknowledged()
@@ -474,6 +477,22 @@ private void assertDocs(
                 Arrays.stream(searchResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toList())
             );
 
+            // look up by id (only 6.0+ as we would otherwise need ability to specify _type in GET API)
+            if (oldVersion.onOrAfter(Version.fromString("6.0.0"))) {
+                GetResponse getResponse = client.get(new GetRequest(index, id), RequestOptions.DEFAULT);
+                assertTrue(getResponse.isExists());
+                assertEquals(sourceForDoc(getIdAsNumeric(id)), getResponse.getSourceAsString());
+            }
+
+            // look up postings
+            searchResponse = client.search(
+                new SearchRequest(index).source(SearchSourceBuilder.searchSource().query(QueryBuilders.matchQuery("test", "test" + num))),
+                randomRequestOptions
+            );
+            logger.info(searchResponse);
+            // check match
+            assertEquals(List.of(id), Arrays.stream(searchResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toList()));
+
             if (oldVersion.before(Version.fromString("6.0.0"))) {
                 // search on _type and check that results contain _type information
                 String randomType = getType(oldVersion, randomFrom(expectedIds));

From 56d0e7b28ea3893a3093390c42b040f7c2d05248 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Thu, 24 Mar 2022 10:48:36 +0100
Subject: [PATCH 02/19] javadoc

---
 .../lucene/bwc/codecs/lucene40/blocktree/FieldReader.java     | 2 --
 .../lucene40/blocktree/Lucene40BlockTreeTermsReader.java      | 1 -
 .../xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java     | 2 --
 .../lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java | 4 ----
 .../lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java    | 2 --
 .../xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java  | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java    | 1 -
 .../lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java   | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java | 2 --
 .../xpack/lucene/bwc/codecs/lucene70/fst/FST.java             | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java     | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java         | 2 --
 .../xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java  | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java         | 1 -
 .../xpack/lucene/bwc/codecs/lucene70/fst/Util.java            | 2 --
 .../lucene40/blocktree/Lucene40BlockTreeTermsWriter.java      | 1 -
 .../lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java    | 1 -
 18 files changed, 27 deletions(-)

diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java
index 3d24e82edd18b..71b90bc71bc4a 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java
@@ -35,8 +35,6 @@
 
 /**
  * BlockTree's implementation of {@link Terms}.
- *
- * @lucene.internal
  */
 public final class FieldReader extends Terms {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
index 807b821d8d145..3237da73cf830 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
@@ -64,7 +64,6 @@
  *
  * <p>See {@code BlockTreeTermsWriter}.
  *
- * @lucene.experimental
  */
 public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java
index 90ee6d1115a57..6ae18c70f3ca9 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java
@@ -31,8 +31,6 @@
 
 /**
  * BlockTree statistics for a single field returned by {@link FieldReader#getStats()}.
- *
- * @lucene.internal
  */
 public class Stats {
     /** Byte size of the index. */
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java
index fd04a28ce23fb..4ff6199a52577 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/BWCLucene50PostingsFormat.java
@@ -330,8 +330,6 @@
  *             current position.
  *       </ul>
  * </dl>
- *
- * @lucene.experimental
  */
 public class BWCLucene50PostingsFormat extends PostingsFormat {
 
@@ -407,8 +405,6 @@ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException
     /**
      * Holds all state required for {@link Lucene50PostingsReader} to produce a {@link
      * org.apache.lucene.index.PostingsEnum} without re-seeking the terms dict.
-     *
-     * @lucene.internal
      */
     public static final class IntBlockTermState extends BlockTermState {
         /** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java
index 206f5e1ae943b..a3c91c7d3ec44 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsReader.java
@@ -54,8 +54,6 @@
 
 /**
  * Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
- *
- * @lucene.experimental
  */
 public final class Lucene50PostingsReader extends PostingsReaderBase {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java
index 50e5cde04ead3..3f58b1bb417ae 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java
@@ -24,7 +24,6 @@
 /**
  * Legacy methods for manipulating strings.
  *
- * @lucene.internal
  * @deprecated This is only used for backwards compatibility codecs (they
  * don't work with the Java9-based replacement methods).
  */
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java
index 728191932763c..56ba113a1abbb 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BitTableUtil.java
@@ -24,7 +24,6 @@
 /**
  * Static helper methods for {@link FST.Arc.BitTable}.
  *
- * @lucene.experimental
  */
 class BitTableUtil {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java
index 7a58a350fcab1..23db4618bffc0 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/ByteSequenceOutputs.java
@@ -30,7 +30,6 @@
 /**
  * An FST {@link Outputs} implementation where each output is a sequence of bytes.
  *
- * @lucene.experimental
  */
 public final class ByteSequenceOutputs extends Outputs<BytesRef> {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java
index 955327af17ba0..609e419232043 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/BytesRefFSTEnum.java
@@ -26,8 +26,6 @@
 
 /**
  * Enumerates all input (BytesRef) + output pairs in an FST.
- *
- * @lucene.experimental
  */
 public final class BytesRefFSTEnum<T> extends FSTEnum<T> {
     private final BytesRef current = new BytesRef(10);
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java
index 9fb73edb5a118..e5e684e08cd87 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FST.java
@@ -60,7 +60,6 @@
  *
  * <p>See the {@link org.apache.lucene.util.fst package documentation} for some simple examples.
  *
- * @lucene.experimental
  */
 public final class FST<T> implements Accountable {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java
index 7ee6eaa5f7ba4..6fcd4b82b7174 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTCompiler.java
@@ -44,7 +44,6 @@
  * <p>FSTs larger than 2.1GB are now possible (as of Lucene 4.2). FSTs containing more than 2.1B
  * nodes are also now possible, however they cannot be packed.
  *
- * @lucene.experimental
  */
 public class FSTCompiler<T> {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java
index 789c216df6f95..3da2100cf79b7 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/FSTEnum.java
@@ -27,8 +27,6 @@
 
 /**
  * Can next() and advance() through the terms in an FST
- *
- * @lucene.experimental
  */
 abstract class FSTEnum<T> {
     protected final FST<T> fst;
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java
index f0246cbf5c862..310098bbc255a 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OffHeapFSTStore.java
@@ -30,7 +30,6 @@
  * Provides off heap storage of finite state machine (FST), using underlying index input instead of
  * byte store on heap
  *
- * @lucene.experimental
  */
 public final class OffHeapFSTStore implements FSTStore {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java
index 646e56f095d9a..436a1ac7f1d40 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/OnHeapFSTStore.java
@@ -28,7 +28,6 @@
 /**
  * Provides storage of finite state machine (FST), using byte array or byte store allocated on heap.
  *
- * @lucene.experimental
  */
 public final class OnHeapFSTStore implements FSTStore {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java
index a7c5ed8933fed..cb273182a20ea 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Outputs.java
@@ -32,7 +32,6 @@
  * <p>Note that any operation that returns NO_OUTPUT must return the same singleton object from
  * {@link #getNoOutput}.
  *
- * @lucene.experimental
  */
 public abstract class Outputs<T> {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java
index ce2ac82d478b6..2711e9c3f5110 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/fst/Util.java
@@ -39,7 +39,6 @@
 /**
  * Static helper methods.
  *
- * @lucene.experimental
  */
 public final class Util {
     private Util() {}
@@ -98,7 +97,6 @@ public static <T> T get(FST<T> fst, BytesRef input) throws IOException {
     /**
      * Represents a path in TopNSearcher.
      *
-     * @lucene.experimental
      */
     public static class FSTPath<T> {
         /** Holds the last arc appended to this path */
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java
index e6435dae4c12b..eaf35139bd146 100644
--- a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java
@@ -198,7 +198,6 @@ order, meaning if you just next() the file pointer will
  * </ul>
  *
  * @see Lucene40BlockTreeTermsReader
- * @lucene.experimental
  */
 public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
 
diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java
index 7e3a92acc4682..7bae5453196f9 100644
--- a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java
+++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene50/Lucene50PostingsWriter.java
@@ -59,7 +59,6 @@
  * Postings list for each term will be stored separately.
  *
  * @see Lucene50SkipWriter for details about skipping setting and postings layout.
- * @lucene.experimental
  */
 public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
 

From bbaa535eae3148b9fb742a9904ff2737def64abc Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Mon, 28 Mar 2022 09:40:55 +0200
Subject: [PATCH 03/19] review comments o

---
 .../core/internal/io/IOUtils.java             | 41 ----------------
 .../LegacyAdaptingPerFieldPostingsFormat.java | 47 +++++++++++++------
 .../Lucene40BlockTreeTermsReader.java         |  8 +++-
 .../bwc/codecs/lucene60/Lucene60Codec.java    |  5 --
 .../bwc/codecs/lucene62/Lucene62Codec.java    |  5 --
 .../bwc/codecs/lucene70/BWCLucene70Codec.java |  2 -
 .../oldrepos/OldRepositoryAccessIT.java       |  2 +-
 7 files changed, 40 insertions(+), 70 deletions(-)

diff --git a/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java b/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java
index 183ff4111b693..5699180285746 100644
--- a/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java
+++ b/libs/core/src/main/java/org/elasticsearch/core/internal/io/IOUtils.java
@@ -317,45 +317,4 @@ public static void fsync(final Path fileToSync, final boolean isDir, final boole
         }
     }
 
-    /**
-     * This utility method takes a previously caught (non-null) {@code Throwable} and rethrows either
-     * the original argument if it was a subclass of the {@code IOException} or an {@code
-     * RuntimeException} with the cause set to the argument.
-     *
-     * <p>This method <strong>never returns any value</strong>, even though it declares a return value
-     * of type {@link Error}. The return value declaration is very useful to let the compiler know
-     * that the code path following the invocation of this method is unreachable. So in most cases the
-     * invocation of this method will be guarded by an {@code if} and used together with a {@code
-     * throw} statement, as in:
-     *
-     * <pre>{@code
-     * if (t != null) throw IOUtils.rethrowAlways(t)
-     * }</pre>
-     *
-     * @param th The throwable to rethrow, <strong>must not be null</strong>.
-     * @return This method always results in an exception, it never returns any value. See method
-     *     documentation for details and usage example.
-     * @throws IOException if the argument was an instance of IOException
-     * @throws RuntimeException with the {@link RuntimeException#getCause()} set to the argument, if
-     *     it was not an instance of IOException.
-     */
-    public static Error rethrowAlways(Throwable th) throws IOException, RuntimeException {
-        if (th == null) {
-            throw new AssertionError("rethrow argument must not be null.");
-        }
-
-        if (th instanceof IOException) {
-            throw (IOException) th;
-        }
-
-        if (th instanceof RuntimeException) {
-            throw (RuntimeException) th;
-        }
-
-        if (th instanceof Error) {
-            throw (Error) th;
-        }
-
-        throw new RuntimeException(th);
-    }
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java
index 8aefcd875834c..4ee7456ae6993 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java
@@ -1,10 +1,22 @@
 /*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
  */
-
 package org.elasticsearch.xpack.lucene.bwc.codecs;
 
 import org.apache.lucene.codecs.FieldsConsumer;
@@ -32,6 +44,19 @@
 import java.util.Map;
 import java.util.TreeMap;
 
+/**
+ * Modified version of {@link PerFieldPostingsFormat} that allows swapping in
+ * {@link org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat} instead of
+ * {@link org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat} when reading from older
+ * codecs. The former has full support for older Lucene versions (going back to Lucene 5) while the
+ * latter only supports Lucene 7 and above (as it was shipped with backwards-codecs of Lucene 9 that
+ * only has support for N-2).
+ *
+ * This class can probably be removed once we are on Lucene 10 and Lucene50PostingsFormat is no longer
+ * shipped as part of bwc jars.
+ *
+ * Swapping out formats can be done via the {@link #getPostingsFormat(String) method}.
+ */
 public abstract class LegacyAdaptingPerFieldPostingsFormat extends PostingsFormat {
     /** Name of this {@link PostingsFormat}. */
     public static final String PER_FIELD_NAME = "PerField40";
@@ -65,12 +90,12 @@ private class FieldsWriter extends FieldsConsumer {
 
         @Override
         public void write(Fields fields, NormsProducer norms) throws IOException {
-            throw new UnsupportedOperationException();
+            throw new IllegalStateException("This codec should only be used for reading, not writing");
         }
 
         @Override
         public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
-            throw new UnsupportedOperationException();
+            throw new IllegalStateException("This codec should only be used for reading, not writing");
         }
 
         @Override
@@ -189,12 +214,4 @@ public final FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOExc
     public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
         return new FieldsReader(state, this);
     }
-
-    /**
-     * Returns the postings format that should be used for writing new segments of <code>field</code>.
-     *
-     * <p>The field to format mapping is written to the index, so this method is only invoked when
-     * writing, not when reading.
-     */
-    public abstract PostingsFormat getPostingsFormatForField(String field);
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
index 3237da73cf830..44690566c6acc 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
@@ -32,6 +32,7 @@
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.core.SuppressForbidden;
 import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.ByteSequenceOutputs;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.fst.Outputs;
@@ -280,7 +281,7 @@ public Lucene40BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentRe
                     if (metaIn != null) {
                         CodecUtil.checkFooter(metaIn, priorE);
                     } else if (priorE != null) {
-                        IOUtils.rethrowAlways(priorE);
+                        rethrowAlways(priorE);
                     }
                 }
             }
@@ -327,6 +328,11 @@ private static void seekDir(IndexInput input) throws IOException {
         input.seek(offset);
     }
 
+    @SuppressForbidden(reason = "Lucene class")
+    private static Error rethrowAlways(Throwable th) throws IOException, RuntimeException {
+        return org.apache.lucene.util.IOUtils.rethrowAlways(th);
+    }
+
     // for debugging
     // private static String toHex(int v) {
     // return "0x" + Integer.toHexString(v);
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
index 55fe5c3b98f64..d507d49907433 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
@@ -59,11 +59,6 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
         }
     };
     private final PostingsFormat postingsFormat = new LegacyAdaptingPerFieldPostingsFormat() {
-        @Override
-        public PostingsFormat getPostingsFormatForField(String field) {
-            throw new IllegalStateException("This codec should only be used for reading, not writing");
-        }
-
         @Override
         protected PostingsFormat getPostingsFormat(String formatName) {
             if (formatName.equals("Lucene50")) {
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
index e3317a1c00c8c..85084317977b3 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
@@ -58,11 +58,6 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
         }
     };
     private final PostingsFormat postingsFormat = new LegacyAdaptingPerFieldPostingsFormat() {
-        @Override
-        public PostingsFormat getPostingsFormatForField(String field) {
-            throw new IllegalStateException("This codec should only be used for reading, not writing");
-        }
-
         @Override
         protected PostingsFormat getPostingsFormat(String formatName) {
             if (formatName.equals("Lucene50")) {
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
index 90739206b5643..8e52baa9a73c5 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
@@ -3,8 +3,6 @@
  * or more contributor license agreements. Licensed under the Elastic License
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
- *
- * Modifications copyright (C) 2021 Elasticsearch B.V.
  */
 
 package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70;
diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
index d63ea5301fcdb..094fdca692081 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
@@ -491,7 +491,7 @@ private void assertDocs(
             );
             logger.info(searchResponse);
             // check match
-            assertEquals(List.of(id), Arrays.stream(searchResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toList()));
+            ElasticsearchAssertions.assertSearchHits(searchResponse, id);
 
             if (oldVersion.before(Version.fromString("6.0.0"))) {
                 // search on _type and check that results contain _type information

From 0d40083f68ffd01448d3261e6235d90d23acfb23 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 27 Apr 2022 11:27:24 +0200
Subject: [PATCH 04/19] Verify / rewrite mappings using full analysis service

---
 .../metadata/IndexMetadataVerifier.java       | 11 ++---------
 .../java/org/elasticsearch/node/Node.java     |  3 ++-
 .../snapshots/RestoreService.java             | 19 +++++++++++++++----
 .../snapshots/SnapshotResiliencyTests.java    |  3 ++-
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifier.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifier.java
index 2d07a5abc6cca..f8f77409db89d 100644
--- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifier.java
+++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifier.java
@@ -18,15 +18,12 @@
 import org.elasticsearch.common.settings.IndexScopedSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
-import org.elasticsearch.core.Nullable;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
-import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MapperRegistry;
 import org.elasticsearch.index.mapper.MapperService;
-import org.elasticsearch.index.mapper.Mapping;
 import org.elasticsearch.index.similarity.SimilarityService;
 import org.elasticsearch.script.ScriptCompiler;
 import org.elasticsearch.script.ScriptService;
@@ -92,7 +89,7 @@ public IndexMetadata verifyIndexMetadata(IndexMetadata indexMetadata, Version mi
         // Next we have to run this otherwise if we try to create IndexSettings
         // with broken settings it would fail in checkMappingsCompatibility
         newMetadata = archiveBrokenIndexSettings(newMetadata);
-        createAndValidateMapping(newMetadata);
+        checkMappingsCompatibility(newMetadata);
         return newMetadata;
     }
 
@@ -129,10 +126,8 @@ private static void checkSupportedVersion(IndexMetadata indexMetadata, Version m
      * Note that we don't expect users to encounter mapping incompatibilities, since our index compatibility
      * policy guarantees we can read mappings from previous compatible index versions. A failure here would
      * indicate a compatibility bug (which are unfortunately not that uncommon).
-     * @return the mapping
      */
-    @Nullable
-    public Mapping createAndValidateMapping(IndexMetadata indexMetadata) {
+    private void checkMappingsCompatibility(IndexMetadata indexMetadata) {
         try {
 
             // We cannot instantiate real analysis server or similarity service at this point because the node
@@ -199,8 +194,6 @@ public Set<Entry<String, NamedAnalyzer>> entrySet() {
                     scriptService
                 );
                 mapperService.merge(indexMetadata, MapperService.MergeReason.MAPPING_RECOVERY);
-                DocumentMapper documentMapper = mapperService.documentMapper();
-                return documentMapper == null ? null : documentMapper.mapping();
             }
         } catch (Exception ex) {
             // Wrap the inner exception so we have the index name in the exception message
diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java
index 6e7c52523131b..92df7e81caf82 100644
--- a/server/src/main/java/org/elasticsearch/node/Node.java
+++ b/server/src/main/java/org/elasticsearch/node/Node.java
@@ -793,7 +793,8 @@ protected Node(
                 clusterModule.getMetadataDeleteIndexService(),
                 indexMetadataVerifier,
                 shardLimitValidator,
-                systemIndices
+                systemIndices,
+                indicesService
             );
             final DiskThresholdMonitor diskThresholdMonitor = new DiskThresholdMonitor(
                 settings,
diff --git a/server/src/main/java/org/elasticsearch/snapshots/RestoreService.java b/server/src/main/java/org/elasticsearch/snapshots/RestoreService.java
index 1568cfd82a3ce..e0490b9d6dfb8 100644
--- a/server/src/main/java/org/elasticsearch/snapshots/RestoreService.java
+++ b/server/src/main/java/org/elasticsearch/snapshots/RestoreService.java
@@ -61,10 +61,12 @@
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.Mapping;
 import org.elasticsearch.index.shard.IndexLongFieldRange;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.index.shard.ShardId;
+import org.elasticsearch.indices.IndicesService;
 import org.elasticsearch.indices.ShardLimitValidator;
 import org.elasticsearch.indices.SystemDataStreamDescriptor;
 import org.elasticsearch.indices.SystemIndices;
@@ -176,6 +178,8 @@ public class RestoreService implements ClusterStateApplier {
 
     private final SystemIndices systemIndices;
 
+    private final IndicesService indicesService;
+
     private volatile boolean refreshRepositoryUuidOnRestore;
 
     public RestoreService(
@@ -186,7 +190,8 @@ public RestoreService(
         MetadataDeleteIndexService metadataDeleteIndexService,
         IndexMetadataVerifier indexMetadataVerifier,
         ShardLimitValidator shardLimitValidator,
-        SystemIndices systemIndices
+        SystemIndices systemIndices,
+        IndicesService indicesService
     ) {
         this.clusterService = clusterService;
         this.repositoriesService = repositoriesService;
@@ -200,6 +205,7 @@ public RestoreService(
         this.clusterSettings = clusterService.getClusterSettings();
         this.shardLimitValidator = shardLimitValidator;
         this.systemIndices = systemIndices;
+        this.indicesService = indicesService;
         this.refreshRepositoryUuidOnRestore = REFRESH_REPO_UUID_ON_RESTORE_SETTING.get(clusterService.getSettings());
         clusterService.getClusterSettings()
             .addSettingsUpdateConsumer(REFRESH_REPO_UUID_ON_RESTORE_SETTING, this::setRefreshRepositoryUuidOnRestore);
@@ -1286,7 +1292,7 @@ public ClusterState execute(ClusterState currentState) {
                 );
                 if (snapshotIndexMetadata.getCompatibilityVersion().before(minIndexCompatibilityVersion)) {
                     // adapt index metadata so that it can be understood by current version
-                    snapshotIndexMetadata = convertLegacyIndex(snapshotIndexMetadata, currentState, indexMetadataVerifier);
+                    snapshotIndexMetadata = convertLegacyIndex(snapshotIndexMetadata, currentState, indicesService);
                 }
                 try {
                     snapshotIndexMetadata = indexMetadataVerifier.verifyIndexMetadata(snapshotIndexMetadata, minIndexCompatibilityVersion);
@@ -1579,7 +1585,7 @@ public void clusterStateProcessed(ClusterState oldState, ClusterState newState)
     private static IndexMetadata convertLegacyIndex(
         IndexMetadata snapshotIndexMetadata,
         ClusterState clusterState,
-        IndexMetadataVerifier indexMetadataVerifier
+        IndicesService indicesService
     ) {
         if (snapshotIndexMetadata.getCreationVersion().before(Version.fromString("5.0.0"))) {
             throw new IllegalArgumentException("can't restore an index created before version 5.0.0");
@@ -1668,7 +1674,12 @@ private static IndexMetadata convertLegacyIndex(
             IndexMetadata convertedIndexMetadata = convertedIndexMetadataBuilder.build();
 
             try {
-                Mapping mapping = indexMetadataVerifier.createAndValidateMapping(convertedIndexMetadata);
+                Mapping mapping;
+                try (MapperService mapperService = indicesService.createIndexMapperService(convertedIndexMetadata)) {
+                    // create and validate in-memory mapping
+                    mapperService.merge(convertedIndexMetadata, MapperService.MergeReason.MAPPING_RECOVERY);
+                    mapping = mapperService.documentMapper().mapping();
+                }
                 if (mapping != null) {
                     convertedIndexMetadataBuilder = IndexMetadata.builder(convertedIndexMetadata);
                     // using the recomputed mapping allows stripping some fields that we no longer support (e.g. include_in_all)
diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
index d607492ac0d6e..07a2d6c42df7b 100644
--- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
+++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
@@ -1918,7 +1918,8 @@ protected void assertSnapshotOrGenericThread() {
                     new MetadataDeleteIndexService(settings, clusterService, allocationService),
                     new IndexMetadataVerifier(settings, namedXContentRegistry, mapperRegistry, indexScopedSettings, ScriptCompiler.NONE),
                     shardLimitValidator,
-                    EmptySystemIndices.INSTANCE
+                    EmptySystemIndices.INSTANCE,
+                    indicesService
                 );
                 actions.put(
                     PutMappingAction.INSTANCE,

From b7dd42143946cd2e54b6473a39e44fca1c50fcc8 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 27 Apr 2022 11:45:24 +0200
Subject: [PATCH 05/19] allow queries on text field type

---
 .../java/org/elasticsearch/index/mapper/TextFieldMapper.java | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index f0b8b6de41493..e60bb4bf64bc7 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -453,7 +453,10 @@ public TextFieldMapper build(MapperBuilderContext context) {
         }
     }
 
-    public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
+    private static final Version MINIMUM_COMPATIBILITY_VERSION = Version.fromString("5.0.0");
+
+    public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()),
+        MINIMUM_COMPATIBILITY_VERSION);
 
     private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
 

From 7f590addfbed9c9f1a7e4e532db2ad4f97ea6b47 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 27 Apr 2022 12:06:27 +0200
Subject: [PATCH 06/19] make analyzer lenient and updateable

---
 .../extras/MatchOnlyTextFieldMapper.java      |  3 +-
 .../extras/SearchAsYouTypeFieldMapper.java    | 13 ++++---
 .../AnnotatedTextFieldMapper.java             | 13 ++++---
 .../AnnotatedTextFieldTypeTests.java          |  3 +-
 .../index/mapper/FieldMapper.java             | 34 +++++++++++++++++--
 .../index/mapper/TextFieldMapper.java         |  9 +++--
 .../index/mapper/TextParams.java              | 19 ++++++++---
 7 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
index 7e5f300f78814..edc8c8241f95f 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -97,7 +97,8 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
             this.analyzers = new TextParams.Analyzers(
                 indexAnalyzers,
                 m -> ((MatchOnlyTextFieldMapper) m).indexAnalyzer,
-                m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap
+                m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
+                indexCreatedVersion
             );
         }
 
diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java
index 41b494367d91d..03bbc01a3a0b5 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java
@@ -35,6 +35,7 @@
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
@@ -92,7 +93,7 @@ public static class Defaults {
         public static final int MAX_SHINGLE_SIZE = 3;
     }
 
-    public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
+    public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
 
     private static Builder builder(FieldMapper in) {
         return ((SearchAsYouTypeFieldMapper) in).builder;
@@ -141,12 +142,16 @@ public static class Builder extends FieldMapper.Builder {
 
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
 
-        public Builder(String name, IndexAnalyzers indexAnalyzers) {
+        private final Version indexCreatedVersion;
+
+        public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
             super(name);
+            this.indexCreatedVersion = indexCreatedVersion;
             this.analyzers = new TextParams.Analyzers(
                 indexAnalyzers,
                 m -> builder(m).analyzers.getIndexAnalyzer(),
-                m -> builder(m).analyzers.positionIncrementGap.getValue()
+                m -> builder(m).analyzers.positionIncrementGap.getValue(),
+                indexCreatedVersion
             );
         }
 
@@ -702,7 +707,7 @@ protected String contentType() {
     }
 
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
+        return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
     }
 
     public static String getShingleFieldName(String parentField, int shingleSize) {
diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
index 04aaa10e90f84..43ade660ebe5d 100644
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@@ -21,6 +21,7 @@
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
@@ -86,12 +87,16 @@ public static class Builder extends FieldMapper.Builder {
 
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
 
-        public Builder(String name, IndexAnalyzers indexAnalyzers) {
+        private final Version indexCreatedVersion;
+
+        public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
             super(name);
+            this.indexCreatedVersion = indexCreatedVersion;
             this.analyzers = new TextParams.Analyzers(
                 indexAnalyzers,
                 m -> builder(m).analyzers.getIndexAnalyzer(),
-                m -> builder(m).analyzers.positionIncrementGap.getValue()
+                m -> builder(m).analyzers.positionIncrementGap.getValue(),
+                indexCreatedVersion
             );
         }
 
@@ -145,7 +150,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
         }
     }
 
-    public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
+    public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
 
     /**
      * Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
@@ -519,6 +524,6 @@ protected String contentType() {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
+        return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
     }
 }
diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java
index d9d28d34f88d5..0ead11b1e2ae9 100644
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java
@@ -11,6 +11,7 @@
 import org.apache.lucene.queries.intervals.Intervals;
 import org.apache.lucene.queries.intervals.IntervalsSource;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
 import org.elasticsearch.index.mapper.FieldTypeTestCase;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
@@ -28,7 +29,7 @@ public void testIntervals() throws IOException {
     }
 
     public void testFetchSourceValue() throws IOException {
-        MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", createDefaultIndexAnalyzers()).build(
+        MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", Version.CURRENT, createDefaultIndexAnalyzers()).build(
             MapperBuilderContext.ROOT
         ).fieldType();
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
index f1db921bd452f..39e757d5b9fad 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
@@ -8,6 +8,9 @@
 
 package org.elasticsearch.index.mapper;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.message.ParameterizedMessage;
 import org.apache.lucene.index.LeafReaderContext;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.Explicit;
@@ -48,6 +51,8 @@
 import java.util.function.Supplier;
 
 public abstract class FieldMapper extends Mapper implements Cloneable {
+    private static final Logger logger = LogManager.getLogger(FieldMapper.class);
+
     public static final Setting<Boolean> IGNORE_MALFORMED_SETTING = Setting.boolSetting(
         "index.mapping.ignore_malformed",
         false,
@@ -1042,23 +1047,48 @@ public static <T extends Enum<T>> Parameter<T> restrictedEnumParam(
          * @param updateable        whether the parameter can be changed by a mapping update
          * @param initializer       a function that reads the parameter value from an existing mapper
          * @param defaultAnalyzer   the default value, to be used if the parameter is undefined in a mapping
+         * @param indexCreatedVersion the version on which this index was created
          */
         public static Parameter<NamedAnalyzer> analyzerParam(
             String name,
             boolean updateable,
             Function<FieldMapper, NamedAnalyzer> initializer,
-            Supplier<NamedAnalyzer> defaultAnalyzer
+            Supplier<NamedAnalyzer> defaultAnalyzer,
+            Version indexCreatedVersion
         ) {
             return new Parameter<>(name, updateable, defaultAnalyzer, (n, c, o) -> {
                 String analyzerName = o.toString();
                 NamedAnalyzer a = c.getIndexAnalyzers().get(analyzerName);
                 if (a == null) {
-                    throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
+                    if (indexCreatedVersion.isLegacyIndexVersion()) {
+                        logger.warn(
+                            new ParameterizedMessage("Could not find analyzer [{}] of legacy index, falling back to default", analyzerName)
+                        );
+                        a = defaultAnalyzer.get();
+                    } else {
+                        throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
+                    }
                 }
                 return a;
             }, initializer, (b, n, v) -> b.field(n, v.name()), NamedAnalyzer::name);
         }
 
+        /**
+         * Defines a parameter that takes an analyzer name
+         * @param name              the parameter name
+         * @param updateable        whether the parameter can be changed by a mapping update
+         * @param initializer       a function that reads the parameter value from an existing mapper
+         * @param defaultAnalyzer   the default value, to be used if the parameter is undefined in a mapping
+         */
+        public static Parameter<NamedAnalyzer> analyzerParam(
+            String name,
+            boolean updateable,
+            Function<FieldMapper, NamedAnalyzer> initializer,
+            Supplier<NamedAnalyzer> defaultAnalyzer
+        ) {
+            return analyzerParam(name, updateable, initializer, defaultAnalyzer, Version.CURRENT);
+        }
+
         /**
          * Declares a metadata parameter
          */
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index e60bb4bf64bc7..5bdf7e6e1230c 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -278,7 +278,8 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
             this.analyzers = new TextParams.Analyzers(
                 indexAnalyzers,
                 m -> ((TextFieldMapper) m).indexAnalyzer,
-                m -> (((TextFieldMapper) m).positionIncrementGap)
+                m -> (((TextFieldMapper) m).positionIncrementGap),
+                indexCreatedVersion
             );
         }
 
@@ -455,8 +456,10 @@ public TextFieldMapper build(MapperBuilderContext context) {
 
     private static final Version MINIMUM_COMPATIBILITY_VERSION = Version.fromString("5.0.0");
 
-    public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()),
-        MINIMUM_COMPATIBILITY_VERSION);
+    public static final TypeParser PARSER = new TypeParser(
+        (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()),
+        MINIMUM_COMPATIBILITY_VERSION
+    );
 
     private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextParams.java b/server/src/main/java/org/elasticsearch/index/mapper/TextParams.java
index 56410b778d197..b94387e0d59b4 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextParams.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextParams.java
@@ -10,6 +10,7 @@
 
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
+import org.elasticsearch.Version;
 import org.elasticsearch.index.analysis.AnalysisMode;
 import org.elasticsearch.index.analysis.AnalysisRegistry;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
@@ -38,9 +39,17 @@ public static final class Analyzers {
         public Analyzers(
             IndexAnalyzers indexAnalyzers,
             Function<FieldMapper, NamedAnalyzer> analyzerInitFunction,
-            Function<FieldMapper, Integer> positionGapInitFunction
+            Function<FieldMapper, Integer> positionGapInitFunction,
+            Version indexCreatedVersion
         ) {
-            this.indexAnalyzer = Parameter.analyzerParam("analyzer", false, analyzerInitFunction, indexAnalyzers::getDefaultIndexAnalyzer)
+
+            this.indexAnalyzer = Parameter.analyzerParam(
+                "analyzer",
+                indexCreatedVersion.isLegacyIndexVersion(),
+                analyzerInitFunction,
+                indexAnalyzers::getDefaultIndexAnalyzer,
+                indexCreatedVersion
+            )
                 .setSerializerCheck(
                     (id, ic, a) -> id
                         || ic
@@ -60,7 +69,8 @@ public Analyzers(
                         }
                     }
                     return indexAnalyzer.get();
-                }
+                },
+                indexCreatedVersion
             )
                 .setSerializerCheck((id, ic, a) -> id || ic || Objects.equals(a, getSearchQuoteAnalyzer()) == false)
                 .addValidator(a -> a.checkAllowedInMode(AnalysisMode.SEARCH_TIME));
@@ -76,7 +86,8 @@ public Analyzers(
                         }
                     }
                     return searchAnalyzer.get();
-                }
+                },
+                indexCreatedVersion
             ).addValidator(a -> a.checkAllowedInMode(AnalysisMode.SEARCH_TIME));
             this.positionIncrementGap = Parameter.intParam(
                 "position_increment_gap",

From c33e835a3fdb05107347169d2317e6dd29835ee3 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 27 Apr 2022 15:18:37 +0200
Subject: [PATCH 07/19] fix tests

---
 .../java/org/elasticsearch/index/mapper/ObjectMapperTests.java | 2 +-
 .../elasticsearch/index/mapper/TextFieldAnalyzerModeTests.java | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java
index b0a2c5b8b87cb..6d864b69dbd9a 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java
@@ -340,7 +340,7 @@ public void testUnknownLegacyFields() throws Exception {
     public void testUnmappedLegacyFields() throws Exception {
         MapperService service = createMapperService(Version.fromString("5.0.0"), Settings.EMPTY, () -> false, mapping(b -> {
             b.startObject("name");
-            b.field("type", "text");
+            b.field("type", CompletionFieldMapper.CONTENT_TYPE);
             b.field("unknown_setting", 5);
             b.endObject();
         }));
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldAnalyzerModeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldAnalyzerModeTests.java
index 5b2d7eea2153b..cdd3a6480983a 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldAnalyzerModeTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldAnalyzerModeTests.java
@@ -69,6 +69,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() {
         Map<String, Object> fieldNode = new HashMap<>();
         fieldNode.put("analyzer", "my_analyzer");
         MappingParserContext parserContext = mock(MappingParserContext.class);
+        when(parserContext.indexVersionCreated()).thenReturn(Version.CURRENT);
 
         // check AnalysisMode.ALL works
         Map<String, NamedAnalyzer> analyzers = defaultAnalyzers();
@@ -103,6 +104,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() {
                 fieldNode.put("search_analyzer", "standard");
             }
             MappingParserContext parserContext = mock(MappingParserContext.class);
+            when(parserContext.indexVersionCreated()).thenReturn(Version.CURRENT);
 
             // check AnalysisMode.ALL and AnalysisMode.SEARCH_TIME works
             Map<String, NamedAnalyzer> analyzers = defaultAnalyzers();
@@ -143,6 +145,7 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() {
         Map<String, Object> fieldNode = new HashMap<>();
         fieldNode.put("analyzer", "my_analyzer");
         MappingParserContext parserContext = mock(MappingParserContext.class);
+        when(parserContext.indexVersionCreated()).thenReturn(Version.CURRENT);
 
         // check that "analyzer" set to AnalysisMode.INDEX_TIME is blocked if there is no search analyzer
         AnalysisMode mode = AnalysisMode.INDEX_TIME;

From ac09d0247bd00072da810f6aab9f94587fc6c07c Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 27 Apr 2022 15:47:38 +0200
Subject: [PATCH 08/19] fix test

---
 .../java/org/elasticsearch/index/mapper/MultiFieldTests.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java
index 29c00cf6b0a5d..d8fbe18a4ac78 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java
@@ -227,7 +227,7 @@ public void testUnmappedLegacyFieldsUnderKnownRootField() throws Exception {
             b.startObject("name");
             b.field("type", "keyword");
             b.startObject("fields");
-            b.startObject("subfield").field("type", "text").endObject();
+            b.startObject("subfield").field("type", CompletionFieldMapper.CONTENT_TYPE).endObject();
             b.endObject();
             b.endObject();
         }));
@@ -250,7 +250,7 @@ public void testFieldsUnderUnknownRootField() throws Exception {
     public void testFieldsUnderUnmappedRootField() throws Exception {
         MapperService service = createMapperService(Version.fromString("5.0.0"), Settings.EMPTY, () -> false, mapping(b -> {
             b.startObject("name");
-            b.field("type", "text");
+            b.field("type", CompletionFieldMapper.CONTENT_TYPE);
             b.startObject("fields");
             b.startObject("subfield").field("type", "keyword").endObject();
             b.endObject();

From e99a176fae5421b28ea2d881a9bf488fdeb60620 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 27 Apr 2022 17:21:01 +0200
Subject: [PATCH 09/19] test fixes

---
 .../java/org/elasticsearch/oldrepos/OldMappingsIT.java    | 8 ++++----
 .../resources/org/elasticsearch/oldrepos/filebeat.json    | 3 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
index a672925a0328c..334e48f904925 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
@@ -197,7 +197,7 @@ public void testSearchOnPlaceHolderField() throws IOException {
             .startObject()
             .startObject("query")
             .startObject("match")
-            .startObject("apache2.access.agent")
+            .startObject("completion")
             .field("query", "some-agent")
             .endObject()
             .endObject()
@@ -207,7 +207,7 @@ public void testSearchOnPlaceHolderField() throws IOException {
         ResponseException re = expectThrows(ResponseException.class, () -> entityAsMap(client().performRequest(search)));
         assertThat(
             re.getMessage(),
-            containsString("Field [apache2.access.agent] of type [text] in legacy index does not support match queries")
+            containsString("Field [completion] of type [completion] in legacy index does not support match queries")
         );
     }
 
@@ -218,14 +218,14 @@ public void testAggregationOnPlaceholderField() throws IOException {
             .startObject("aggs")
             .startObject("agents")
             .startObject("terms")
-            .field("field", "apache2.access.agent")
+            .field("field", "completion")
             .endObject()
             .endObject()
             .endObject()
             .endObject();
         search.setJsonEntity(Strings.toString(query));
         ResponseException re = expectThrows(ResponseException.class, () -> entityAsMap(client().performRequest(search)));
-        assertThat(re.getMessage(), containsString("can't run aggregation or sorts on field type text of legacy index"));
+        assertThat(re.getMessage(), containsString("can't run aggregation or sorts on field type completion of legacy index"));
     }
 
 }
diff --git a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json
index 6fa22f1c36ef9..a5debfb988386 100644
--- a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json
+++ b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json
@@ -676,6 +676,9 @@
     "type": {
       "ignore_above": 1024,
       "type": "keyword"
+    },
+    "completion": {
+      "type": "completion"
     }
   }
 }

From c0e508fea8bef59c9fb9d6de1647431cd626cf6e Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Thu, 28 Apr 2022 08:40:29 +0200
Subject: [PATCH 10/19] use constant scoring

---
 .../index/mapper/TextFieldMapper.java         | 64 ++++++++++++++++++-
 .../elasticsearch/oldrepos/OldMappingsIT.java | 25 ++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index 5bdf7e6e1230c..f08bfa37239c4 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -50,6 +50,7 @@
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
@@ -330,7 +331,7 @@ protected List<Parameter<?>> getParameters() {
             );
         }
 
-        private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
+        private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context, Version indexCreatedVersion) {
             NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
             NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
             if (analyzers.positionIncrementGap.isConfigured()) {
@@ -341,7 +342,12 @@ private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext c
                 }
             }
             TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer);
-            TextFieldType ft = new TextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
+            TextFieldType ft;
+            if (indexCreatedVersion.isLegacyIndexVersion()) {
+                ft = new ConstantScoreTextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
+            } else {
+                ft = new TextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
+            }
             ft.eagerGlobalOrdinals = eagerGlobalOrdinals.getValue();
             if (fieldData.getValue()) {
                 ft.setFielddata(true, freqFilter.getValue());
@@ -431,7 +437,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers(String name, SubFieldInfo phras
         @Override
         public TextFieldMapper build(MapperBuilderContext context) {
             FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
-            TextFieldType tft = buildFieldType(fieldType, context);
+            TextFieldType tft = buildFieldType(fieldType, context, indexCreatedVersion);
             SubFieldInfo phraseFieldInfo = buildPhraseInfo(fieldType, tft);
             SubFieldInfo prefixFieldInfo = buildPrefixInfo(context, fieldType, tft);
             MultiFields multiFields = multiFieldsBuilder.build(this, context);
@@ -903,6 +909,58 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
 
     }
 
+    public static class ConstantScoreTextFieldType extends TextFieldType {
+
+        public ConstantScoreTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map<String, String> meta) {
+            super(name, indexed, stored, tsi, meta);
+        }
+
+        @Override
+        public Query termQuery(Object value, SearchExecutionContext context) {
+            // Disable scoring
+            return new ConstantScoreQuery(super.termQuery(value, context));
+        }
+
+        @Override
+        public Query fuzzyQuery(
+            Object value,
+            Fuzziness fuzziness,
+            int prefixLength,
+            int maxExpansions,
+            boolean transpositions,
+            SearchExecutionContext context
+        ) {
+            // Disable scoring
+            return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context));
+        }
+
+        @Override
+        public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
+            throws IOException {
+            // Disable scoring
+            return new ConstantScoreQuery(super.phraseQuery(stream, slop, enablePosIncrements, queryShardContext));
+        }
+
+        @Override
+        public Query multiPhraseQuery(
+            TokenStream stream,
+            int slop,
+            boolean enablePositionIncrements,
+            SearchExecutionContext queryShardContext
+        ) throws IOException {
+            // Disable scoring
+            return new ConstantScoreQuery(super.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext));
+        }
+
+        @Override
+        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext)
+            throws IOException {
+            // Disable scoring
+            return new ConstantScoreQuery(super.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext));
+        }
+
+    }
+
     private final Version indexCreatedVersion;
     private final boolean index;
     private final boolean store;
diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
index 334e48f904925..e8550d44d9af4 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
@@ -36,6 +36,7 @@
 import java.util.stream.Collectors;
 
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.hasKey;
 import static org.hamcrest.Matchers.hasSize;
 
 public class OldMappingsIT extends ESRestTestCase {
@@ -98,6 +99,7 @@ public void setupIndex() throws IOException {
                 .startObject("apache2")
                 .startObject("access")
                 .field("url", "myurl1")
+                .field("agent", "agent1")
                 .endObject()
                 .endObject()
                 .endObject();
@@ -111,6 +113,7 @@ public void setupIndex() throws IOException {
                 .startObject("apache2")
                 .startObject("access")
                 .field("url", "myurl2")
+                .field("agent", "agent2 agent2")
                 .endObject()
                 .endObject()
                 .endObject();
@@ -228,4 +231,26 @@ public void testAggregationOnPlaceholderField() throws IOException {
         assertThat(re.getMessage(), containsString("can't run aggregation or sorts on field type completion of legacy index"));
     }
 
+    public void testConstantScoringOnTextField() throws IOException {
+        Request search = new Request("POST", "/" + "filebeat" + "/_search");
+        XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
+            .startObject()
+            .startObject("query")
+            .startObject("match")
+            .startObject("apache2.access.agent")
+            .field("query", "agent2")
+            .endObject()
+            .endObject()
+            .endObject()
+            .endObject();
+        search.setJsonEntity(Strings.toString(query));
+        Map<String, Object> response = entityAsMap(client().performRequest(search));
+        List<?> hits = (List<?>) (XContentMapValues.extractValue("hits.hits", response));
+        assertThat(hits, hasSize(1));
+        @SuppressWarnings("unchecked")
+        Map<String, Object> hit = (Map<String, Object>) hits.get(0);
+        assertThat(hit, hasKey("_score"));
+        assertEquals(1.0d, (double) hit.get("_score"), 0.01d);
+    }
+
 }

From d28cea8b7a11a0eec1f3e6d732e78cd9df50e9f2 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Mon, 2 May 2022 10:54:52 +0200
Subject: [PATCH 11/19] revert change

---
 libs/core/src/main/java/org/elasticsearch/core/IOUtils.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libs/core/src/main/java/org/elasticsearch/core/IOUtils.java b/libs/core/src/main/java/org/elasticsearch/core/IOUtils.java
index afd12b8b00015..0398418e503bc 100644
--- a/libs/core/src/main/java/org/elasticsearch/core/IOUtils.java
+++ b/libs/core/src/main/java/org/elasticsearch/core/IOUtils.java
@@ -314,5 +314,4 @@ public static void fsync(final Path fileToSync, final boolean isDir, final boole
             }
         }
     }
-
 }

From 0415143a4bcc51fea8e81b076b1e69009d1992f3 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 10 May 2022 08:56:28 +0200
Subject: [PATCH 12/19] tests

---
 .../index/mapper/TextFieldMapper.java         |  20 ++
 .../ConstantScoreTextFieldTypeTests.java      | 270 ++++++++++++++++++
 .../index/mapper/TextFieldMapperTests.java    |  18 ++
 3 files changed, 308 insertions(+)
 create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index f08bfa37239c4..ed6e4b178b6fa 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -915,6 +915,26 @@ public ConstantScoreTextFieldType(String name, boolean indexed, boolean stored,
             super(name, indexed, stored, tsi, meta);
         }
 
+        public ConstantScoreTextFieldType(String name) {
+            this(
+                name,
+                true,
+                false,
+                new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+                Collections.emptyMap()
+            );
+        }
+
+        public ConstantScoreTextFieldType(String name, boolean indexed, boolean stored, Map<String, String> meta) {
+            this(
+                name,
+                indexed,
+                stored,
+                new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+                meta
+            );
+        }
+
         @Override
         public Query termQuery(Object value, SearchExecutionContext context) {
             // Disable scoring
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java
new file mode 100644
index 0000000000000..fa2947d9d0b3b
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java
@@ -0,0 +1,270 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.intervals.Intervals;
+import org.apache.lucene.queries.intervals.IntervalsSource;
+import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.search.TermInSetQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.Operations;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.common.lucene.search.AutomatonQueries;
+import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.index.mapper.TextFieldMapper.ConstantScoreTextFieldType;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
+import static org.hamcrest.Matchers.equalTo;
+
+public class ConstantScoreTextFieldTypeTests extends FieldTypeTestCase {
+
+    private static ConstantScoreTextFieldType createFieldType() {
+        return new ConstantScoreTextFieldType("field");
+    }
+
+    public void testIsAggregatableDependsOnFieldData() {
+        ConstantScoreTextFieldType ft = createFieldType();
+        assertFalse(ft.isAggregatable());
+        ft.setFielddata(true);
+        assertTrue(ft.isAggregatable());
+    }
+
+    public void testTermQuery() {
+        MappedFieldType ft = createFieldType();
+        assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
+        assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null));
+
+        MappedFieldType unsearchable = new ConstantScoreTextFieldType("field", false, false, Collections.emptyMap());
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null));
+        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+    }
+
+    public void testTermsQuery() {
+        MappedFieldType ft = createFieldType();
+        List<BytesRef> terms = new ArrayList<>();
+        terms.add(new BytesRef("foo"));
+        terms.add(new BytesRef("bar"));
+        assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null));
+
+        MappedFieldType unsearchable = new ConstantScoreTextFieldType("field", false, false, Collections.emptyMap());
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null)
+        );
+        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+    }
+
+    public void testRangeQuery() {
+        MappedFieldType ft = createFieldType();
+        assertEquals(
+            new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false),
+            ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT)
+        );
+
+        ElasticsearchException ee = expectThrows(
+            ElasticsearchException.class,
+            () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
+        );
+        assertEquals(
+            "[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.",
+            ee.getMessage()
+        );
+    }
+
+    public void testRegexpQuery() {
+        MappedFieldType ft = createFieldType();
+        assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT));
+
+        MappedFieldType unsearchable = new ConstantScoreTextFieldType("field", false, false, Collections.emptyMap());
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)
+        );
+        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+
+        ElasticsearchException ee = expectThrows(
+            ElasticsearchException.class,
+            () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
+        );
+        assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
+    }
+
+    public void testFuzzyQuery() {
+        MappedFieldType ft = createFieldType();
+        assertEquals(
+            new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)),
+            ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
+        );
+
+        MappedFieldType unsearchable = new ConstantScoreTextFieldType("field", false, false, Collections.emptyMap());
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
+        );
+        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+
+        ElasticsearchException ee = expectThrows(
+            ElasticsearchException.class,
+            () -> ft.fuzzyQuery(
+                "foo",
+                Fuzziness.AUTO,
+                randomInt(10) + 1,
+                randomInt(10) + 1,
+                randomBoolean(),
+                MOCK_CONTEXT_DISALLOW_EXPENSIVE
+            )
+        );
+        assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
+    }
+
+    public void testIndexPrefixes() {
+        ConstantScoreTextFieldType ft = createFieldType();
+        ft.setIndexPrefixes(2, 10);
+
+        Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockContext());
+        assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
+
+        q = ft.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, false, MOCK_CONTEXT);
+        assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);
+
+        q = ft.prefixQuery("Internationalisatio", CONSTANT_SCORE_REWRITE, true, MOCK_CONTEXT);
+        assertEquals(AutomatonQueries.caseInsensitivePrefixQuery(new Term("field", "Internationalisatio")), q);
+
+        ElasticsearchException ee = expectThrows(
+            ElasticsearchException.class,
+            () -> ft.prefixQuery("internationalisatio", null, false, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
+        );
+        assertEquals(
+            "[prefix] queries cannot be executed when 'search.allow_expensive_queries' is set to false. "
+                + "For optimised prefix queries on text fields please enable [index_prefixes].",
+            ee.getMessage()
+        );
+
+        q = ft.prefixQuery("g", CONSTANT_SCORE_REWRITE, false, randomMockContext());
+        Automaton automaton = Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
+
+        Query expected = new ConstantScoreQuery(
+            new BooleanQuery.Builder().add(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton), BooleanClause.Occur.SHOULD)
+                .add(new TermQuery(new Term("field", "g")), BooleanClause.Occur.SHOULD)
+                .build()
+        );
+
+        assertThat(q, equalTo(expected));
+    }
+
+    public void testFetchSourceValue() throws IOException {
+        ConstantScoreTextFieldType fieldType = createFieldType();
+
+        assertEquals(List.of("value"), fetchSourceValue(fieldType, "value"));
+        assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L));
+        assertEquals(List.of("true"), fetchSourceValue(fieldType, true));
+    }
+
+    public void testWildcardQuery() {
+        ConstantScoreTextFieldType ft = createFieldType();
+
+        // case sensitive
+        AutomatonQuery actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, false, MOCK_CONTEXT);
+        AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
+        assertEquals(expected, actual);
+        assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
+
+        // case insensitive
+        actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, true, MOCK_CONTEXT);
+        expected = AutomatonQueries.caseInsensitiveWildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
+        assertEquals(expected, actual);
+        assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
+        assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
+
+        ElasticsearchException ee = expectThrows(
+            ElasticsearchException.class,
+            () -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
+        );
+        assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
+    }
+
+    /**
+     * we use this e.g. in query string query parser to normalize terms on text fields
+     */
+    public void testNormalizedWildcardQuery() {
+        ConstantScoreTextFieldType ft = createFieldType();
+
+        AutomatonQuery actual = (AutomatonQuery) ft.normalizedWildcardQuery("*Butterflies*", null, MOCK_CONTEXT);
+        AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*butterflies*")));
+        assertEquals(expected, actual);
+        assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
+        assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
+
+        ElasticsearchException ee = expectThrows(
+            ElasticsearchException.class,
+            () -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
+        );
+        assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
+    }
+
+    public void testTermIntervals() throws IOException {
+        MappedFieldType ft = createFieldType();
+        IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
+        assertEquals(Intervals.term(new BytesRef("foo")), termIntervals);
+    }
+
+    public void testPrefixIntervals() throws IOException {
+        MappedFieldType ft = createFieldType();
+        IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
+        assertEquals(Intervals.prefix(new BytesRef("foo")), prefixIntervals);
+    }
+
+    public void testWildcardIntervals() throws IOException {
+        MappedFieldType ft = createFieldType();
+        IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
+        assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
+    }
+
+    public void testFuzzyIntervals() throws IOException {
+        MappedFieldType ft = createFieldType();
+        IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
+        FuzzyQuery fq = new FuzzyQuery(new Term("field", "foo"), 1, 2, 128, true);
+        IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), "foo");
+        assertEquals(expectedIntervals, fuzzyIntervals);
+    }
+
+    public void testPrefixIntervalsWithIndexedPrefixes() {
+        ConstantScoreTextFieldType ft = createFieldType();
+        ft.setIndexPrefixes(1, 4);
+        IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
+        assertEquals(Intervals.fixField("field._index_prefix", Intervals.term(new BytesRef("foo"))), prefixIntervals);
+    }
+
+    public void testWildcardIntervalsWithIndexedPrefixes() {
+        ConstantScoreTextFieldType ft = createFieldType();
+        ft.setIndexPrefixes(1, 4);
+        IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
+        assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
index 7684c8e695b6a..e7f83efa2e49f 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
@@ -43,6 +43,7 @@
 import org.apache.lucene.tests.analysis.MockSynonymAnalyzer;
 import org.apache.lucene.tests.analysis.Token;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.index.IndexSettings;
@@ -1087,4 +1088,21 @@ protected Object generateRandomInputValue(MappedFieldType ft) {
     protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
         assumeFalse("We don't have a way to assert things here", true);
     }
+
+    public void testUnknownAnalyzerOnLegacyIndex() throws IOException {
+        XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "does_not_exist"));
+
+        expectThrows(MapperParsingException.class, () -> createMapperService(startingMapping));
+
+        MapperService mapperService = createMapperService(Version.fromString("5.0.0"), startingMapping);
+        assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class));
+
+        merge(mapperService, startingMapping);
+        assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class));
+
+        // check that analyzer can be swapped out on legacy index
+        XContentBuilder differentAnalyzer = fieldMapping(b -> b.field("type", "text").field("analyzer", "keyword"));
+        merge(mapperService, differentAnalyzer);
+        assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class));
+    }
 }

From 7ef03bd054cc17e520cc69071792bce79c5dd157 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 10 May 2022 09:41:15 +0200
Subject: [PATCH 13/19] tests

---
 .../xpack/lucene/bwc/codecs/BWCCodec.java     | 64 +++++++++++++++++++
 .../bwc/codecs/lucene60/Lucene60Codec.java    |  2 +-
 .../bwc/codecs/lucene62/Lucene62Codec.java    |  2 +-
 .../elasticsearch/oldrepos/OldMappingsIT.java | 19 +++---
 .../org/elasticsearch/oldrepos/custom.json    | 23 +++++++
 5 files changed, 99 insertions(+), 11 deletions(-)
 create mode 100644 x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json

diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
index be5be0bc6a965..6a85c3533c17a 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
@@ -10,20 +10,29 @@
 import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -136,4 +145,59 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
         return segmentInfo1;
     }
 
+    /**
+     * In-memory postings format that shows no postings available.
+     */
+    public static class EmptyPostingsFormat extends PostingsFormat {
+
+        public EmptyPostingsFormat() {
+            super("EmptyPostingsFormat");
+        }
+
+        @Override
+        public FieldsConsumer fieldsConsumer(SegmentWriteState state) {
+            return new FieldsConsumer() {
+                @Override
+                public void write(Fields fields, NormsProducer norms) {
+                    throw new UnsupportedOperationException();
+                }
+
+                @Override
+                public void close() {
+
+                }
+            };
+        }
+
+        @Override
+        public FieldsProducer fieldsProducer(SegmentReadState state) {
+            return new FieldsProducer() {
+                @Override
+                public void close() {
+
+                }
+
+                @Override
+                public void checkIntegrity() {
+
+                }
+
+                @Override
+                public Iterator<String> iterator() {
+                    return null;
+                }
+
+                @Override
+                public Terms terms(String field) {
+                    return null;
+                }
+
+                @Override
+                public int size() {
+                    return 0;
+                }
+            };
+        }
+    }
+
 }
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
index d507d49907433..39bf8d5a431a5 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
@@ -64,7 +64,7 @@ protected PostingsFormat getPostingsFormat(String formatName) {
             if (formatName.equals("Lucene50")) {
                 return new BWCLucene50PostingsFormat();
             } else {
-                return super.getPostingsFormat(formatName);
+                return new EmptyPostingsFormat();
             }
         }
     };
diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
index 85084317977b3..a2f3225d66e70 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
@@ -63,7 +63,7 @@ protected PostingsFormat getPostingsFormat(String formatName) {
             if (formatName.equals("Lucene50")) {
                 return new BWCLucene50PostingsFormat();
             } else {
-                return super.getPostingsFormat(formatName);
+                return new EmptyPostingsFormat();
             }
         }
     };
diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
index e5db2380f177b..f82b640681950 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
@@ -79,9 +79,9 @@ public void setupIndex() throws IOException {
         String snapshotName = "snap";
         List<String> indices;
         if (oldVersion.before(Version.fromString("6.0.0"))) {
-            indices = Arrays.asList("filebeat", "winlogbeat");
+            indices = Arrays.asList("filebeat", "winlogbeat", "custom");
         } else {
-            indices = Arrays.asList("filebeat");
+            indices = Arrays.asList("filebeat", "custom");
         }
 
         int oldEsPort = Integer.parseInt(System.getProperty("tests.es.port"));
@@ -91,8 +91,9 @@ public void setupIndex() throws IOException {
             if (oldVersion.before(Version.fromString("6.0.0"))) {
                 assertOK(oldEs.performRequest(createIndex("winlogbeat", "winlogbeat.json")));
             }
+            assertOK(oldEs.performRequest(createIndex("custom", "custom.json")));
 
-            Request doc1 = new Request("PUT", "/" + "filebeat" + "/" + "doc" + "/" + "1");
+            Request doc1 = new Request("PUT", "/" + "custom" + "/" + "doc" + "/" + "1");
             doc1.addParameter("refresh", "true");
             XContentBuilder bodyDoc1 = XContentFactory.jsonBuilder()
                 .startObject()
@@ -106,7 +107,7 @@ public void setupIndex() throws IOException {
             doc1.setJsonEntity(Strings.toString(bodyDoc1));
             assertOK(oldEs.performRequest(doc1));
 
-            Request doc2 = new Request("PUT", "/" + "filebeat" + "/" + "doc" + "/" + "2");
+            Request doc2 = new Request("PUT", "/" + "custom" + "/" + "doc" + "/" + "2");
             doc2.addParameter("refresh", "true");
             XContentBuilder bodyDoc2 = XContentFactory.jsonBuilder()
                 .startObject()
@@ -178,7 +179,7 @@ public void testMappingOk() throws IOException {
     }
 
     public void testSearchKeyword() throws IOException {
-        Request search = new Request("POST", "/" + "filebeat" + "/_search");
+        Request search = new Request("POST", "/" + "custom" + "/_search");
         XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
             .startObject()
             .startObject("query")
@@ -196,7 +197,7 @@ public void testSearchKeyword() throws IOException {
     }
 
     public void testSearchOnPlaceHolderField() throws IOException {
-        Request search = new Request("POST", "/" + "filebeat" + "/_search");
+        Request search = new Request("POST", "/" + "custom" + "/_search");
         XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
             .startObject()
             .startObject("query")
@@ -216,7 +217,7 @@ public void testSearchOnPlaceHolderField() throws IOException {
     }
 
     public void testAggregationOnPlaceholderField() throws IOException {
-        Request search = new Request("POST", "/" + "filebeat" + "/_search");
+        Request search = new Request("POST", "/" + "custom" + "/_search");
         XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
             .startObject()
             .startObject("aggs")
@@ -233,7 +234,7 @@ public void testAggregationOnPlaceholderField() throws IOException {
     }
 
     public void testConstantScoringOnTextField() throws IOException {
-        Request search = new Request("POST", "/" + "filebeat" + "/_search");
+        Request search = new Request("POST", "/" + "custom" + "/_search");
         XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
             .startObject()
             .startObject("query")
@@ -255,7 +256,7 @@ public void testConstantScoringOnTextField() throws IOException {
     }
 
     public void testSearchFieldsOnPlaceholderField() throws IOException {
-        Request search = new Request("POST", "/" + "filebeat" + "/_search");
+        Request search = new Request("POST", "/" + "custom" + "/_search");
         XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
             .startObject()
             .startObject("query")
diff --git a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json
new file mode 100644
index 0000000000000..c9c4b34179223
--- /dev/null
+++ b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json
@@ -0,0 +1,23 @@
+"_default_": {
+  "properties": {
+    "apache2": {
+      "properties": {
+        "access": {
+          "properties": {
+            "agent": {
+              "norms": false,
+              "type": "text"
+            },
+            "url": {
+              "ignore_above": 1024,
+              "type": "keyword"
+            }
+          }
+        }
+      }
+    },
+    "completion": {
+      "type": "completion"
+    }
+  }
+}

From 1da6dd1fe5896329ad6155937b324ec3a4b6b68d Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 10 May 2022 09:47:18 +0200
Subject: [PATCH 14/19] =?UTF-8?q?fix=C3=B8=20o=20x?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../elasticsearch/oldrepos/OldRepositoryAccessIT.java    | 9 ---------
 .../resources/org/elasticsearch/oldrepos/filebeat.json   | 3 ---
 2 files changed, 12 deletions(-)

diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
index a6f4e59e56668..a5b0472f2f845 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java
@@ -11,8 +11,6 @@
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest;
 import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequest;
-import org.elasticsearch.action.get.GetRequest;
-import org.elasticsearch.action.get.GetResponse;
 import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.client.Request;
@@ -441,13 +439,6 @@ private void assertDocs(
                 Arrays.stream(searchResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toList())
             );
 
-            // look up by id (only 6.0+ as we would otherwise need ability to specify _type in GET API)
-            if (oldVersion.onOrAfter(Version.fromString("6.0.0"))) {
-                GetResponse getResponse = client.get(new GetRequest(index, id), RequestOptions.DEFAULT);
-                assertTrue(getResponse.isExists());
-                assertEquals(sourceForDoc(getIdAsNumeric(id)), getResponse.getSourceAsString());
-            }
-
             // look up postings
             searchResponse = client.search(
                 new SearchRequest(index).source(SearchSourceBuilder.searchSource().query(QueryBuilders.matchQuery("test", "test" + num))),
diff --git a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json
index a5debfb988386..6fa22f1c36ef9 100644
--- a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json
+++ b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/filebeat.json
@@ -676,9 +676,6 @@
     "type": {
       "ignore_above": 1024,
       "type": "keyword"
-    },
-    "completion": {
-      "type": "completion"
     }
   }
 }

From 948f2a95f2c0a6e2bc9cbc68dfc9b24bd81683b7 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 10 May 2022 10:00:07 +0200
Subject: [PATCH 15/19] remove

---
 .../org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
index 6a85c3533c17a..036cd042f8ec0 100644
--- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
+++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
@@ -138,7 +138,7 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
             codec,
             segmentInfo.getDiagnostics(),
             segmentInfo.getId(),
-            segmentInfo.getAttributes(), // adapt attributes so that per-field format codecs are overriden
+            segmentInfo.getAttributes(),
             segmentInfo.getIndexSort()
         );
         segmentInfo1.setFiles(segmentInfo.files());

From 56d391c364c486ee935aaad75d351fe241d0b040 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 10 May 2022 14:40:50 +0200
Subject: [PATCH 16/19] no fielddata

---
 .../index/mapper/TextFieldMapper.java         |  9 ++++---
 .../ConstantScoreTextFieldTypeTests.java      |  7 ------
 .../index/mapper/TextFieldMapperTests.java    | 25 +++++++++++++++++++
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index ed6e4b178b6fa..df86e5ae05568 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -345,12 +345,13 @@ private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext c
             TextFieldType ft;
             if (indexCreatedVersion.isLegacyIndexVersion()) {
                 ft = new ConstantScoreTextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
+                // ignore fieldData and eagerGlobalOrdinals
             } else {
                 ft = new TextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
-            }
-            ft.eagerGlobalOrdinals = eagerGlobalOrdinals.getValue();
-            if (fieldData.getValue()) {
-                ft.setFielddata(true, freqFilter.getValue());
+                ft.eagerGlobalOrdinals = eagerGlobalOrdinals.getValue();
+                if (fieldData.getValue()) {
+                    ft.setFielddata(true, freqFilter.getValue());
+                }
             }
             return ft;
         }
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java
index fa2947d9d0b3b..e63e5e816483f 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java
@@ -48,13 +48,6 @@ private static ConstantScoreTextFieldType createFieldType() {
         return new ConstantScoreTextFieldType("field");
     }
 
-    public void testIsAggregatableDependsOnFieldData() {
-        ConstantScoreTextFieldType ft = createFieldType();
-        assertFalse(ft.isAggregatable());
-        ft.setFielddata(true);
-        assertTrue(ft.isAggregatable());
-    }
-
     public void testTermQuery() {
         MappedFieldType ft = createFieldType();
         assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
index 00567082ceac3..0edef028c8123 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
@@ -1105,4 +1105,29 @@ public void testUnknownAnalyzerOnLegacyIndex() throws IOException {
         merge(mapperService, differentAnalyzer);
         assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class));
     }
+
+    public void testIgnoreFieldDataOnLegacyIndex() throws IOException {
+        XContentBuilder mapping = fieldMapping(b -> b.field("type", "text").field("fielddata", true));
+        MapperService mapperService = createMapperService(mapping);
+        assertTrue(((TextFieldMapper) mapperService.documentMapper().mappers().getMapper("field")).fieldType().fielddata());
+
+        mapperService = createMapperService(Version.fromString("5.0.0"), mapping);
+        assertFalse(((TextFieldMapper) mapperService.documentMapper().mappers().getMapper("field")).fieldType().fielddata());
+
+        MapperService finalMapperService = mapperService;
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> ((TextFieldMapper) finalMapperService.documentMapper().mappers().getMapper("field")).fieldType()
+                .fielddataBuilder("test", null)
+        );
+    }
+
+    public void testIgnoreEagerGlobalOrdinalsOnLegacyIndex() throws IOException {
+        XContentBuilder mapping = fieldMapping(b -> b.field("type", "text").field("eager_global_ordinals", true));
+        MapperService mapperService = createMapperService(mapping);
+        assertTrue(((TextFieldMapper) mapperService.documentMapper().mappers().getMapper("field")).fieldType().eagerGlobalOrdinals());
+
+        mapperService = createMapperService(Version.fromString("5.0.0"), mapping);
+        assertFalse(((TextFieldMapper) mapperService.documentMapper().mappers().getMapper("field")).fieldType().eagerGlobalOrdinals());
+    }
 }

From 604d70c05b677de421b18d6ab4849efa24b02607 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Tue, 10 May 2022 15:24:27 +0200
Subject: [PATCH 17/19] no spans

---
 .../index/mapper/TextFieldMapper.java             | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index a1d33f63ee20d..e91a8b848ce3b 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -345,7 +345,7 @@ private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext c
             TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer);
             TextFieldType ft;
             if (indexCreatedVersion.isLegacyIndexVersion()) {
-                ft = new ConstantScoreTextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
+                ft = new LegacyTextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
                 // ignore fieldData and eagerGlobalOrdinals
             } else {
                 ft = new TextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
@@ -983,6 +983,19 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
 
     }
 
+    static class LegacyTextFieldType extends ConstantScoreTextFieldType {
+
+        LegacyTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map<String, String> meta) {
+            super(name, indexed, stored, tsi, meta);
+        }
+
+        @Override
+        public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, SearchExecutionContext context) {
+            throw new IllegalArgumentException("Cannot use span prefix queries on text field " + name() + " of a legacy index");
+        }
+
+    }
+
     private final Version indexCreatedVersion;
     private final boolean index;
     private final boolean store;

From 817b5a3d347f21f82d4a3550682ab01c69f5bd16 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 11 May 2022 11:31:03 +0200
Subject: [PATCH 18/19] disable norms properly

---
 .../index/mapper/TextFieldMapper.java            |  9 ++++++++-
 .../elasticsearch/oldrepos/OldMappingsIT.java    | 16 ++++++++++++++++
 .../org/elasticsearch/oldrepos/custom.json       |  1 -
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index e91a8b848ce3b..5911d3abe796c 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -438,7 +438,14 @@ public Map<String, NamedAnalyzer> indexAnalyzers(String name, SubFieldInfo phras
 
         @Override
         public TextFieldMapper build(MapperBuilderContext context) {
-            FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
+            FieldType fieldType = TextParams.buildFieldType(
+                index,
+                store,
+                indexOptions,
+                // legacy indices do not have access to norms
+                indexCreatedVersion.isLegacyIndexVersion() ? () -> false : norms,
+                termVectors
+            );
             TextFieldType tft = buildFieldType(fieldType, context, indexCreatedVersion);
             SubFieldInfo phraseFieldInfo = buildPhraseInfo(fieldType, tft);
             SubFieldInfo prefixFieldInfo = buildPrefixInfo(context, fieldType, tft);
diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
index f82b640681950..6f49856cdcb25 100644
--- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
+++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java
@@ -255,6 +255,22 @@ public void testConstantScoringOnTextField() throws IOException {
         assertEquals(1.0d, (double) hit.get("_score"), 0.01d);
     }
 
+    public void testFieldsExistQueryOnTextField() throws IOException {
+        Request search = new Request("POST", "/" + "custom" + "/_search");
+        XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
+            .startObject()
+            .startObject("query")
+            .startObject("exists")
+            .field("field", "apache2.access.agent")
+            .endObject()
+            .endObject()
+            .endObject();
+        search.setJsonEntity(Strings.toString(query));
+        Map<String, Object> response = entityAsMap(client().performRequest(search));
+        List<?> hits = (List<?>) (XContentMapValues.extractValue("hits.hits", response));
+        assertThat(hits, hasSize(2));
+    }
+
     public void testSearchFieldsOnPlaceholderField() throws IOException {
         Request search = new Request("POST", "/" + "custom" + "/_search");
         XContentBuilder query = XContentBuilder.builder(XContentType.JSON.xContent())
diff --git a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json
index c9c4b34179223..ae52ccbcce330 100644
--- a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json
+++ b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json
@@ -5,7 +5,6 @@
         "access": {
           "properties": {
             "agent": {
-              "norms": false,
               "type": "text"
             },
             "url": {

From 1f96dfc911bcbde329331f6284f57c3254d5cd81 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 11 May 2022 12:51:53 +0200
Subject: [PATCH 19/19] fix existsQuery on text fields

---
 .../index/mapper/TextFieldMapper.java           | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index 5911d3abe796c..2b0874ba84aba 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -46,6 +46,7 @@
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
+import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
@@ -79,6 +80,8 @@
 import java.util.function.IntPredicate;
 import java.util.function.Supplier;
 
+import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
+
 /** A {@link FieldMapper} for full-text fields. */
 public class TextFieldMapper extends FieldMapper {
 
@@ -992,8 +995,12 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
 
     static class LegacyTextFieldType extends ConstantScoreTextFieldType {
 
+        private final MappedFieldType existQueryFieldType;
+
         LegacyTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map<String, String> meta) {
             super(name, indexed, stored, tsi, meta);
+            // norms are not available, neither are doc-values, so fall back to _source to run exists query
+            existQueryFieldType = KeywordScriptFieldType.sourceOnly(name()).asMappedFieldTypes().findFirst().get();
         }
 
         @Override
@@ -1001,6 +1008,16 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew
             throw new IllegalArgumentException("Cannot use span prefix queries on text field " + name() + " of a legacy index");
         }
 
+        @Override
+        public Query existsQuery(SearchExecutionContext context) {
+            if (context.allowExpensiveQueries() == false) {
+                throw new ElasticsearchException(
+                    "runtime-computed exists query cannot be executed while [" + ALLOW_EXPENSIVE_QUERIES.getKey() + "] is set to [false]."
+                );
+            }
+            return existQueryFieldType.existsQuery(context);
+        }
+
     }
 
     private final Version indexCreatedVersion;