Skip to content

Commit 82867e0

Browse files
committed
MDEV-35897 vector index search allocates too much memory for large ef_search
never estimate that a graph search will visit more nodes than there are in the graph. In fact, let's reduce the graph size by 30%, it'll increase the false positive rate of a bloom filter by 2% when visiting the whole graph, it doesn't affect recall noticeably. we need to read the shared graph size under a lock. let's store it in the thread-local unused TABLE::used_stat_records member.
1 parent 395db6f commit 82867e0

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

sql/vector_mhnsw.cc

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2024, MariaDB plc
2+
Copyright (c) 2024, 2025, MariaDB plc
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -753,6 +753,8 @@ MHNSW_Share *MHNSW_Share::get_from_share(TABLE_SHARE *share, TABLE *table)
753753
}
754754
if (ctx)
755755
ctx->refcnt++;
756+
if (table) // hijack TABLE::used_stat_records
757+
table->hlindex->used_stat_records= ctx->node_cache.size();
756758
share->unlock_share();
757759
return ctx;
758760
}
@@ -1144,8 +1146,9 @@ static int search_layer(MHNSW_Share *ctx, TABLE *graph, const FVector *target,
11441146

11451147
// WARNING! heuristic here
11461148
const double est_heuristic= 8 * std::sqrt(ctx->max_neighbors(layer));
1147-
const uint est_size= static_cast<uint>(est_heuristic * std::pow(ef, ctx->ef_power));
1148-
VisitedSet visited(root, target, est_size);
1149+
double est_size= est_heuristic * std::pow(ef, ctx->ef_power);
1150+
set_if_smaller(est_size, graph->used_stat_records/1.3);
1151+
VisitedSet visited(root, target, static_cast<uint>(est_size));
11491152

11501153
candidates.init(max_ef, false, Visited::cmp);
11511154
best.init(ef, true, Visited::cmp);
@@ -1213,9 +1216,9 @@ static int search_layer(MHNSW_Share *ctx, TABLE *graph, const FVector *target,
12131216
}
12141217
}
12151218
set_if_bigger(ctx->diameter, max_distance); // not atomic, but it's ok
1216-
if (ef > 1 && visited.count*2 > est_size)
1219+
if (ef > 1 && visited.count > est_size)
12171220
{
1218-
double ef_power= std::log(visited.count*2/est_heuristic) / std::log(ef);
1221+
double ef_power= std::log(visited.count/est_heuristic) / std::log(ef);
12191222
set_if_bigger(ctx->ef_power, ef_power); // not atomic, but it's ok
12201223
}
12211224

0 commit comments

Comments
 (0)