From 74ed7d48342a772843a033eb3fa41caa7cd7cb46 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 26 Jun 2025 13:59:18 +0100 Subject: [PATCH 1/5] Fix ES818BinaryQuantizedVectorsReader to not use directIO during merge This commit fixes the BBQ reader to **not** use directIO when merging the original float vectors. --- .../ES818BinaryQuantizedVectorsReader.java | 21 +++++++++- .../vectors/es818/MergeReaderWrapper.java | 5 +++ ...S818BinaryQuantizedVectorsFormatTests.java | 40 +++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index dd3e59be26460..6de775c4773b5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -65,7 +65,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class); - private final Map fields = new HashMap<>(); + private final Map fields; private final IndexInput quantizedVectorData; private final FlatVectorsReader rawVectorsReader; private final ES818BinaryFlatVectorsScorer vectorScorer; @@ -77,6 +77,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme ES818BinaryFlatVectorsScorer vectorsScorer ) throws IOException { super(vectorsScorer); + this.fields = new HashMap<>(); this.vectorScorer = vectorsScorer; this.rawVectorsReader = rawVectorsReader; int versionMeta = -1; @@ -120,6 +121,24 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme } } + private ES818BinaryQuantizedVectorsReader(ES818BinaryQuantizedVectorsReader clone, FlatVectorsReader rawVectorsReader) { + super(clone.vectorScorer); + this.rawVectorsReader = rawVectorsReader; + this.vectorScorer = clone.vectorScorer; + this.quantizedVectorData = clone.quantizedVectorData; + this.fields = clone.fields; + } + + // For testing + FlatVectorsReader getRawVectorsReader() { + return rawVectorsReader; + } + + @Override + public FlatVectorsReader getMergeInstance() { + return new ES818BinaryQuantizedVectorsReader(this, rawVectorsReader.getMergeInstance()); + } + private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java index 4d9d7e03848c8..e74b0aad12723 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java @@ -36,6 +36,11 @@ protected MergeReaderWrapper(FlatVectorsReader mainReader, FlatVectorsReader mer this.mergeReader = mergeReader; } + // For testing + FlatVectorsReader getMainReader() { + return mainReader; + } + @Override public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException { return mainReader.getRandomVectorScorer(field, target); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 9a20b56d80ba4..7f52c9ac34cdd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -35,6 +36,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SoftDeletesRetentionMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.VectorSimilarityFunction; @@ -83,6 +85,7 @@ import static java.lang.String.format; import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -309,6 +312,43 @@ public void testSimpleOffHeapSizeImpl(Directory dir, IndexWriterConfig config, b } } + public void testMergeInstance() throws IOException { + checkDirectIOSupported(); + float[] vector = randomVector(10); + VectorSimilarityFunction similarityFunction = randomSimilarity(); + KnnFloatVectorField knnField = new KnnFloatVectorField("field", vector, similarityFunction); + try (Directory dir = newFSDirectory()) { + try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setUseCompoundFile(false))) { + for (int i = 0; i < 10; i++) { + Document doc = new Document(); + knnField.setVectorValue(randomVector(10)); + doc.add(knnField); + w.addDocument(doc); + } + w.commit(); + w.forceMerge(1); + + try (IndexReader reader = DirectoryReader.open(w)) { + SegmentReader r = (SegmentReader) getOnlyLeafReader(reader); + assertThat(unwrapRawVectorReader("field", r.getVectorReader()), instanceOf(DirectIOLucene99FlatVectorsReader.class)); + assertThat(unwrapRawVectorReader("field", r.getVectorReader().getMergeInstance()), instanceOf(Lucene99FlatVectorsReader.class)); + } + } + } + } + + private static KnnVectorsReader unwrapRawVectorReader(String fieldName, KnnVectorsReader knnReader) { + if (knnReader instanceof PerFieldKnnVectorsFormat.FieldsReader perField) { + return unwrapRawVectorReader(fieldName, perField.getFieldReader(fieldName)); + } else if (knnReader instanceof ES818BinaryQuantizedVectorsReader bbqReader) { + return unwrapRawVectorReader(fieldName, bbqReader.getRawVectorsReader()); + } else if (knnReader instanceof MergeReaderWrapper mergeReaderWrapper) { + return unwrapRawVectorReader(fieldName, mergeReaderWrapper.getMainReader()); + } else { + return knnReader; + } + } + static Directory newMMapDirectory() throws IOException { Directory dir = new MMapDirectory(createTempDir("ES818BinaryQuantizedVectorsFormatTests")); if (random().nextBoolean()) { From d62146f635fc0dc61025a28b51f87dc9d7daf2b4 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 26 Jun 2025 14:02:19 +0100 Subject: [PATCH 2/5] Update docs/changelog/130114.yaml --- docs/changelog/130114.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/130114.yaml diff --git a/docs/changelog/130114.yaml b/docs/changelog/130114.yaml new file mode 100644 index 0000000000000..8e5284adcf9b9 --- /dev/null +++ b/docs/changelog/130114.yaml @@ -0,0 +1,5 @@ +pr: 130114 +summary: Fix ES818BinaryQuantizedVectorsReader to not use directIO during merge +area: Vector Search +type: bug +issues: [] From d73a021eb8fe867ea00fbcfc49cb624fcc352a21 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 26 Jun 2025 13:12:33 +0000 Subject: [PATCH 3/5] [CI] Auto commit changes from spotless --- .../es818/ES818BinaryQuantizedVectorsFormatTests.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 7f52c9ac34cdd..cc956907b7f4b 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -331,7 +331,10 @@ public void testMergeInstance() throws IOException { try (IndexReader reader = DirectoryReader.open(w)) { SegmentReader r = (SegmentReader) getOnlyLeafReader(reader); assertThat(unwrapRawVectorReader("field", r.getVectorReader()), instanceOf(DirectIOLucene99FlatVectorsReader.class)); - assertThat(unwrapRawVectorReader("field", r.getVectorReader().getMergeInstance()), instanceOf(Lucene99FlatVectorsReader.class)); + assertThat( + unwrapRawVectorReader("field", r.getVectorReader().getMergeInstance()), + instanceOf(Lucene99FlatVectorsReader.class) + ); } } } From 806a40763faad93bb1da6ba4ae603d7ae8ae0fcb Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 26 Jun 2025 15:05:01 +0100 Subject: [PATCH 4/5] Delete docs/changelog/130114.yaml --- docs/changelog/130114.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/130114.yaml diff --git a/docs/changelog/130114.yaml b/docs/changelog/130114.yaml deleted file mode 100644 index 8e5284adcf9b9..0000000000000 --- a/docs/changelog/130114.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 130114 -summary: Fix ES818BinaryQuantizedVectorsReader to not use directIO during merge -area: Vector Search -type: bug -issues: [] From da98ed3045e9b67b96f5205f39eadbcf3d7fc8e4 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 26 Jun 2025 21:36:33 +0100 Subject: [PATCH 5/5] fix tests --- .../es818/ES818BinaryQuantizedVectorsFormatTests.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 7f52c9ac34cdd..5fb822b5106dd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -319,14 +319,11 @@ public void testMergeInstance() throws IOException { KnnFloatVectorField knnField = new KnnFloatVectorField("field", vector, similarityFunction); try (Directory dir = newFSDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setUseCompoundFile(false))) { - for (int i = 0; i < 10; i++) { - Document doc = new Document(); - knnField.setVectorValue(randomVector(10)); - doc.add(knnField); - w.addDocument(doc); - } + Document doc = new Document(); + knnField.setVectorValue(randomVector(10)); + doc.add(knnField); + w.addDocument(doc); w.commit(); - w.forceMerge(1); try (IndexReader reader = DirectoryReader.open(w)) { SegmentReader r = (SegmentReader) getOnlyLeafReader(reader);