|
| 1 | +/* |
| 2 | + * @notice |
| 3 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 4 | + * contributor license agreements. See the NOTICE file distributed with |
| 5 | + * this work for additional information regarding copyright ownership. |
| 6 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 7 | + * (the "License"); you may not use this file except in compliance with |
| 8 | + * the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + * |
| 18 | + * Modifications copyright (C) 2021 Elasticsearch B.V. |
| 19 | + */ |
| 20 | +package org.elasticsearch.xpack.lucene.bwc.codecs; |
| 21 | + |
| 22 | +import org.apache.lucene.codecs.FieldsConsumer; |
| 23 | +import org.apache.lucene.codecs.FieldsProducer; |
| 24 | +import org.apache.lucene.codecs.NormsProducer; |
| 25 | +import org.apache.lucene.codecs.PostingsFormat; |
| 26 | +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; |
| 27 | +import org.apache.lucene.index.FieldInfo; |
| 28 | +import org.apache.lucene.index.Fields; |
| 29 | +import org.apache.lucene.index.IndexOptions; |
| 30 | +import org.apache.lucene.index.MergeState; |
| 31 | +import org.apache.lucene.index.SegmentReadState; |
| 32 | +import org.apache.lucene.index.SegmentWriteState; |
| 33 | +import org.apache.lucene.index.Terms; |
| 34 | +import org.elasticsearch.core.IOUtils; |
| 35 | + |
| 36 | +import java.io.Closeable; |
| 37 | +import java.io.IOException; |
| 38 | +import java.util.ArrayList; |
| 39 | +import java.util.Collections; |
| 40 | +import java.util.HashMap; |
| 41 | +import java.util.IdentityHashMap; |
| 42 | +import java.util.Iterator; |
| 43 | +import java.util.List; |
| 44 | +import java.util.Map; |
| 45 | +import java.util.TreeMap; |
| 46 | + |
| 47 | +/** |
| 48 | + * Modified version of {@link PerFieldPostingsFormat} that allows swapping in |
| 49 | + * {@link org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat} instead of |
| 50 | + * {@link org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat} when reading from older |
| 51 | + * codecs. The former has full support for older Lucene versions (going back to Lucene 5) while the |
| 52 | + * latter only supports Lucene 7 and above (as it was shipped with backwards-codecs of Lucene 9 that |
| 53 | + * only has support for N-2). |
| 54 | + * |
| 55 | + * This class can probably be removed once we are on Lucene 10 and Lucene50PostingsFormat is no longer |
| 56 | + * shipped as part of bwc jars. |
| 57 | + * |
| 58 | + * Swapping out formats can be done via the {@link #getPostingsFormat(String) method}. |
| 59 | + */ |
| 60 | +public abstract class LegacyAdaptingPerFieldPostingsFormat extends PostingsFormat { |
| 61 | + /** Name of this {@link PostingsFormat}. */ |
| 62 | + public static final String PER_FIELD_NAME = "PerField40"; |
| 63 | + |
| 64 | + /** {@link FieldInfo} attribute name used to store the format name for each field. */ |
| 65 | + public static final String PER_FIELD_FORMAT_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".format"; |
| 66 | + |
| 67 | + /** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */ |
| 68 | + public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix"; |
| 69 | + |
| 70 | + /** Sole constructor. */ |
| 71 | + protected LegacyAdaptingPerFieldPostingsFormat() { |
| 72 | + super(PER_FIELD_NAME); |
| 73 | + } |
| 74 | + |
| 75 | + static String getSuffix(String formatName, String suffix) { |
| 76 | + return formatName + "_" + suffix; |
| 77 | + } |
| 78 | + |
| 79 | + protected PostingsFormat getPostingsFormat(String formatName) { |
| 80 | + throw new IllegalArgumentException(formatName); |
| 81 | + } |
| 82 | + |
| 83 | + private class FieldsWriter extends FieldsConsumer { |
| 84 | + final SegmentWriteState writeState; |
| 85 | + final List<Closeable> toClose = new ArrayList<Closeable>(); |
| 86 | + |
| 87 | + FieldsWriter(SegmentWriteState writeState) { |
| 88 | + this.writeState = writeState; |
| 89 | + } |
| 90 | + |
| 91 | + @Override |
| 92 | + public void write(Fields fields, NormsProducer norms) throws IOException { |
| 93 | + throw new IllegalStateException("This codec should only be used for reading, not writing"); |
| 94 | + } |
| 95 | + |
| 96 | + @Override |
| 97 | + public void merge(MergeState mergeState, NormsProducer norms) throws IOException { |
| 98 | + throw new IllegalStateException("This codec should only be used for reading, not writing"); |
| 99 | + } |
| 100 | + |
| 101 | + @Override |
| 102 | + public void close() throws IOException { |
| 103 | + IOUtils.close(toClose); |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + private static class FieldsReader extends FieldsProducer { |
| 108 | + |
| 109 | + private final Map<String, FieldsProducer> fields = new TreeMap<>(); |
| 110 | + private final Map<String, FieldsProducer> formats = new HashMap<>(); |
| 111 | + private final String segment; |
| 112 | + |
| 113 | + // clone for merge |
| 114 | + FieldsReader(FieldsReader other) { |
| 115 | + Map<FieldsProducer, FieldsProducer> oldToNew = new IdentityHashMap<>(); |
| 116 | + // First clone all formats |
| 117 | + for (Map.Entry<String, FieldsProducer> ent : other.formats.entrySet()) { |
| 118 | + FieldsProducer values = ent.getValue().getMergeInstance(); |
| 119 | + formats.put(ent.getKey(), values); |
| 120 | + oldToNew.put(ent.getValue(), values); |
| 121 | + } |
| 122 | + |
| 123 | + // Then rebuild fields: |
| 124 | + for (Map.Entry<String, FieldsProducer> ent : other.fields.entrySet()) { |
| 125 | + FieldsProducer producer = oldToNew.get(ent.getValue()); |
| 126 | + assert producer != null; |
| 127 | + fields.put(ent.getKey(), producer); |
| 128 | + } |
| 129 | + |
| 130 | + segment = other.segment; |
| 131 | + } |
| 132 | + |
| 133 | + FieldsReader(final SegmentReadState readState, LegacyAdaptingPerFieldPostingsFormat legacyAdaptingPerFieldPostingsFormat) |
| 134 | + throws IOException { |
| 135 | + |
| 136 | + // Read _X.per and init each format: |
| 137 | + boolean success = false; |
| 138 | + try { |
| 139 | + // Read field name -> format name |
| 140 | + for (FieldInfo fi : readState.fieldInfos) { |
| 141 | + if (fi.getIndexOptions() != IndexOptions.NONE) { |
| 142 | + final String fieldName = fi.name; |
| 143 | + final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY); |
| 144 | + if (formatName != null) { |
| 145 | + // null formatName means the field is in fieldInfos, but has no postings! |
| 146 | + final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY); |
| 147 | + if (suffix == null) { |
| 148 | + throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName); |
| 149 | + } |
| 150 | + PostingsFormat format = legacyAdaptingPerFieldPostingsFormat.getPostingsFormat(formatName); |
| 151 | + String segmentSuffix = getSuffix(formatName, suffix); |
| 152 | + if (formats.containsKey(segmentSuffix) == false) { |
| 153 | + formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix))); |
| 154 | + } |
| 155 | + fields.put(fieldName, formats.get(segmentSuffix)); |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + success = true; |
| 160 | + } finally { |
| 161 | + if (success == false) { |
| 162 | + IOUtils.closeWhileHandlingException(formats.values()); |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + this.segment = readState.segmentInfo.name; |
| 167 | + } |
| 168 | + |
| 169 | + @Override |
| 170 | + public Iterator<String> iterator() { |
| 171 | + return Collections.unmodifiableSet(fields.keySet()).iterator(); |
| 172 | + } |
| 173 | + |
| 174 | + @Override |
| 175 | + public Terms terms(String field) throws IOException { |
| 176 | + FieldsProducer fieldsProducer = fields.get(field); |
| 177 | + return fieldsProducer == null ? null : fieldsProducer.terms(field); |
| 178 | + } |
| 179 | + |
| 180 | + @Override |
| 181 | + public int size() { |
| 182 | + return fields.size(); |
| 183 | + } |
| 184 | + |
| 185 | + @Override |
| 186 | + public void close() throws IOException { |
| 187 | + IOUtils.close(formats.values()); |
| 188 | + } |
| 189 | + |
| 190 | + @Override |
| 191 | + public void checkIntegrity() throws IOException { |
| 192 | + for (FieldsProducer producer : formats.values()) { |
| 193 | + producer.checkIntegrity(); |
| 194 | + } |
| 195 | + } |
| 196 | + |
| 197 | + @Override |
| 198 | + public FieldsProducer getMergeInstance() { |
| 199 | + return new FieldsReader(this); |
| 200 | + } |
| 201 | + |
| 202 | + @Override |
| 203 | + public String toString() { |
| 204 | + return "PerFieldPostings(segment=" + segment + " formats=" + formats.size() + ")"; |
| 205 | + } |
| 206 | + } |
| 207 | + |
| 208 | + @Override |
| 209 | + public final FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| 210 | + return new FieldsWriter(state); |
| 211 | + } |
| 212 | + |
| 213 | + @Override |
| 214 | + public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| 215 | + return new FieldsReader(state, this); |
| 216 | + } |
| 217 | +} |
0 commit comments