From 4d1bde8be0208146a0ed80f57f040fc8ad9edbd1 Mon Sep 17 00:00:00 2001 From: David Hu Date: Tue, 19 May 2026 15:11:06 +0800 Subject: [PATCH] fix: increase ANN search K from 50 to 200 for 72K+ person pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 72K+ stranger persons (~364K face nodes in the HNSW index), K=50 per prototype query was too small: high-similarity candidates (e.g. 77%) were being missed because 50+ other stranger face nodes ranked above them for prominent persons like 胡波 (3505 faces). Increase annSearchK 50→200 and annHNSWEfSearch 100→200 (must be >= K per HNSW guidelines) so each of the ~210 per-patrol queries covers a wider neighborhood, ensuring borderline pairs above the merge-suggestion threshold (55%) are not silently dropped. Co-Authored-By: Claude Sonnet 4.6 --- backend/internal/service/person_merge_suggestion_ann.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/internal/service/person_merge_suggestion_ann.go b/backend/internal/service/person_merge_suggestion_ann.go index 8a74d94..9e14c75 100644 --- a/backend/internal/service/person_merge_suggestion_ann.go +++ b/backend/internal/service/person_merge_suggestion_ann.go @@ -8,9 +8,9 @@ import ( ) const ( - annSearchK = 50 // neighbors per prototype query + annSearchK = 200 // neighbors per prototype query; 72K+ person pool needs wider net than 50 annHNSWM = 16 // max neighbors per node; 16 gives better recall at 60K+ scale vs 8 - annHNSWEfSearch = 100 // search beam width; high value ensures recall near threshold boundary + annHNSWEfSearch = 200 // search beam width; must be >= annSearchK ) // annIndex is a cached HNSW nearest-neighbor index over all person prototype embeddings.