Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/VecSim/algorithms/brute_force/brute_force.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
idToLabelMapping.shrink_to_fit();
resizeLabelLookup(idToLabelMapping.size());
}

size_t indexMetaDataCapacity() const override { return idToLabelMapping.capacity(); }
#endif

protected:
Expand Down
2 changes: 2 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
resizeLabelLookup(idToMetaData.size());
}
}

size_t indexMetaDataCapacity() const override { return idToMetaData.capacity(); }
#endif

protected:
Expand Down
4 changes: 4 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_tiered.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {

#ifdef BUILD_TESTS
void getDataByLabel(labelType label, std::vector<std::vector<DataType>> &vectors_output) const;
size_t indexMetaDataCapacity() const override {
return this->backendIndex->indexMetaDataCapacity() +
this->frontendIndex->indexMetaDataCapacity();
}
#endif
};

Expand Down
1 change: 1 addition & 0 deletions src/VecSim/algorithms/svs/svs.h
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl

public:
void fitMemory() override {}
size_t indexMetaDataCapacity() const override { return this->indexCapacity(); }
std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const override {

// For compressed/quantized indices, this function is not meaningful
Expand Down
6 changes: 6 additions & 0 deletions src/VecSim/algorithms/svs/svs_tiered.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,12 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
backend_index_t *GetBackendIndex() { return this->backendIndex; }
void submitSingleJob(AsyncJob *job) { Base::submitSingleJob(job); }
void submitJobs(vecsim_stl::vector<AsyncJob *> &jobs) { Base::submitJobs(jobs); }
size_t indexMetaDataCapacity() const override {
std::shared_lock<std::shared_mutex> flat_lock(this->flatIndexGuard);
std::shared_lock<std::shared_mutex> main_lock(this->mainIndexGuard);
return this->frontendIndex->indexMetaDataCapacity() +
this->backendIndex->indexMetaDataCapacity();
}
#endif

private:
Expand Down
10 changes: 10 additions & 0 deletions src/VecSim/vec_sim_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,5 +217,15 @@ struct VecSimIndexInterface : public VecsimBaseObject {
}
#ifdef BUILD_TESTS
virtual void fitMemory() = 0;
/**
* @brief get the capacity of the meta data containers.
*
* @return The capacity of the meta data containers in number of elements.
* The value returned from this function may differ from the indexCapacity() function. For
* example, in HNSW, the capacity of the meta data containers is the capacity of the labels
* lookup table, while the capacity of the data containers is the capacity of the vectors
* container.
*/
virtual size_t indexMetaDataCapacity() const = 0;
#endif
};
18 changes: 13 additions & 5 deletions tests/benchmark/bm_vecsim_basics.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {
// Calculate vectors needed to reach next block boundary
size_t vecs_to_blocksize =
BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size);
size_t initial_index_cap = index->indexMetaDataCapacity();
assert(initial_index_cap == N_VECTORS + vecs_to_blocksize);

assert(vecs_to_blocksize < BM_VecSimGeneral::block_size);
labelType initial_label_count = index->indexLabelCount();
labelType curr_label = initial_label_count;
Expand All @@ -342,24 +345,29 @@ void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {

// Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles
labelType label_to_update = curr_label - 1;
size_t index_cap = index->indexCapacity();
size_t index_cap = index->indexMetaDataCapacity();
std::cout << "index_cap after adding vectors " << index_cap << std::endl;
assert(index_cap == initial_index_cap + BM_VecSimGeneral::block_size);

for (auto _ : st) {
// Remove the vector directly from hnsw
size_t ret = VecSimIndex_DeleteVector(
GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0)),
label_to_update);
assert(ret == 1);
assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size);
// Capacity should shrink by one block after deletion

// Capacity should not change
size_t curr_cap = index->indexMetaDataCapacity();
assert(curr_cap == index_cap);
ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(),
label_to_update);
assert(ret == 1);
BM_VecSimGeneral::mock_thread_pool->thread_pool_wait();
assert(VecSimIndex_IndexSize(
GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0))) ==
N_VECTORS + added_vec_count);
// Capacity should grow back to original size after addition
assert(index->indexCapacity() == index_cap);
// Capacity should not change
assert(index->indexMetaDataCapacity() == index_cap);
}
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);

Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {

ASSERT_EQ(bfIndex->indexCapacity(), expected_map_containers_size);
ASSERT_EQ(bfIndex->idToLabelMapping.capacity(), expected_map_containers_size);
ASSERT_EQ(bfIndex->indexMetaDataCapacity(), expected_map_containers_size);
ASSERT_EQ(bfIndex->idToLabelMapping.size(), expected_map_containers_size);
ASSERT_GE(bfIndex->labelToIdLookup.bucket_count(), expected_map_containers_size);
};
Expand Down Expand Up @@ -536,6 +537,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
ASSERT_EQ(hnswIndex->vectors->size(), expected_size);

ASSERT_EQ(hnswIndex->idToMetaData.capacity(), expected_map_containers_size);
ASSERT_EQ(hnswIndex->indexMetaDataCapacity(), expected_map_containers_size);
ASSERT_EQ(hnswIndex->idToMetaData.size(), expected_map_containers_size);
ASSERT_GE(hnswIndex->labelLookup.bucket_count(), expected_map_containers_size);
// Also validate that there are no unidirectional connections (these add memory to the
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/test_hnsw_tiered.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4364,6 +4364,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
ASSERT_EQ(hnsw_index->indexSize(), 1);
ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize);
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
hnsw_index->indexMetaDataCapacity() +
tiered_index->frontendIndex->indexMetaDataCapacity());

// add up to block size
for (size_t i = 1; i < blockSize; i++) {
Expand All @@ -4374,6 +4379,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
ASSERT_EQ(hnsw_index->indexSize(), blockSize);
ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize);
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
hnsw_index->indexMetaDataCapacity() +
tiered_index->frontendIndex->indexMetaDataCapacity());

// add one more vector to trigger another resize
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, blockSize);
Expand All @@ -4383,6 +4393,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1);
ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
hnsw_index->indexMetaDataCapacity() +
tiered_index->frontendIndex->indexMetaDataCapacity());

// delete a vector to shrink data blocks
ASSERT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1) << "Failed to delete vector 0";
Expand All @@ -4394,6 +4409,8 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
ASSERT_EQ(hnsw_index->indexSize(), blockSize);
ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
// meta data capacity should not shrink
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);

// add this vector again and verify lock was acquired to resize
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 0);
Expand All @@ -4402,6 +4419,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1);
ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
hnsw_index->indexMetaDataCapacity() +
tiered_index->frontendIndex->indexMetaDataCapacity());

// add up to block size (count = 2 blockSize), the lock shouldn't be acquired because no resize
// is required
Expand All @@ -4412,4 +4434,9 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
ASSERT_EQ(hnsw_index->indexSize(), 2 * blockSize);
ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
hnsw_index->indexMetaDataCapacity() +
tiered_index->frontendIndex->indexMetaDataCapacity());
}
2 changes: 2 additions & 0 deletions tests/unit/test_svs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ TYPED_TEST(SVSTest, resizeIndex) {
}
// The size (+extra) and the capacity should be equal.
ASSERT_EQ(index->indexCapacity(), VecSimIndex_IndexSize(index) + extra_cap);
ASSERT_EQ(index->indexMetaDataCapacity(), index->indexCapacity());
// The capacity shouldn't be changed.
ASSERT_EQ(index->indexCapacity(), n + extra_cap);

Expand Down Expand Up @@ -878,6 +879,7 @@ TYPED_TEST(SVSTest, svs_empty_index) {

// The expected capacity should be 0 for empty index.
ASSERT_EQ(index->indexCapacity(), 0);
ASSERT_EQ(index->indexMetaDataCapacity(), index->indexCapacity());

// Try to remove it again.
VecSimIndex_DeleteVector(index, 1);
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_svs_tiered.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ TYPED_TEST(SVSTieredIndexTest, addVector) {
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 0);
ASSERT_EQ(tiered_index->GetFlatIndex()->indexCapacity(), DEFAULT_BLOCK_SIZE);
ASSERT_EQ(tiered_index->indexCapacity(), DEFAULT_BLOCK_SIZE);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(), tiered_index->indexCapacity());
ASSERT_EQ(tiered_index->GetFlatIndex()->getDistanceFrom_Unsafe(vec_label, vector), 0);
ASSERT_EQ(mock_thread_pool.jobQ.size(), mock_thread_pool.thread_pool_size);

Expand Down Expand Up @@ -624,6 +625,7 @@ TYPED_TEST(SVSTieredIndexTest, insertJob) {
? DEFAULT_BLOCK_SIZE
: tiered_index->GetBackendIndex()->indexCapacity();
ASSERT_EQ(tiered_index->indexCapacity(), expected_capacity);
ASSERT_EQ(tiered_index->indexMetaDataCapacity(), tiered_index->indexCapacity());
ASSERT_EQ(tiered_index->GetFlatIndex()->indexCapacity(), 0);
ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(vec_label, vector), 0);
}
Expand Down
Loading