Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ const LINEAR_SEARCH_THRESHOLD: u64 = 0;
pub struct QueryBuilder<'a, D: Distance> {
reader: &'a Reader<'a, D>,
candidates: Option<&'a RoaringBitmap>,
filter: Option<Box<dyn Fn(u32, f32) -> bool + 'a>>,
count: usize,
ef: usize,
}
Expand Down Expand Up @@ -101,6 +102,22 @@ impl<'a, D: Distance> QueryBuilder<'a, D> {
self
}

/// Specify a function to be used to filter items.
/// The function should accept (ItemId, Distance) and should return a boolean.
/// A return value of `false` indicates the item should be filtered.
///
/// # Examples
///
/// ```no_run
/// # use hannoy::{Reader, distances::Euclidean};
/// # let (reader, rtxn): (Reader<Euclidean>, heed::RoTxn) = todo!();
/// reader.nns(20).filter(|id, distance| id % 2 == 0).by_item(&rtxn, 6);
/// ```
pub fn filter<F: Fn(u32, f32) -> bool + 'a>(&mut self, filter: F) -> &mut Self {
self.filter = Some(Box::new(filter));
self
}

/// Specify a search buffer size from which the closest elements are returned. Increasing this
/// value improves the search relevancy but increases latency as more neighbours need to be
/// searched.
Expand Down Expand Up @@ -334,7 +351,7 @@ impl<'t, D: Distance> Reader<'t, D> {
///
/// You must provide the number of items you want to receive.
pub fn nns(&self, count: usize) -> QueryBuilder<D> {
QueryBuilder { reader: self, candidates: None, count, ef: DEFAULT_EF_SEARCH }
QueryBuilder { reader: self, candidates: None, filter: None, count, ef: DEFAULT_EF_SEARCH }
}

/// Get a generic read node from the database using the version of the database found while creating the reader.
Expand Down Expand Up @@ -437,6 +454,12 @@ impl<'t, D: Distance> Reader<'t, D> {

let mut nns = Vec::with_capacity(opt.count);
while let Some((OrderedFloat(f), id)) = neighbours.pop_min() {
if let Some(filter) = &opt.filter {
if !filter(id, f) {
continue;
}
}

if opt.candidates.is_none_or(|candidates| candidates.contains(id)) {
nns.push((id, f));
}
Expand Down
Loading