Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/frontend/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2110,7 +2110,7 @@ async fn collection_search(

tracing::info!(
name: "collection_search",
num_queries = payload.searches.len(),
searches = ?payload.searches.iter().map(|s| s.masked()).collect::<Vec<_>>(),
);

// Override limit by quota
Expand Down
87 changes: 87 additions & 0 deletions rust/types/src/execution/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,20 @@ impl TryFrom<Filter> for chroma_proto::FilterOperator {
}
}

impl Filter {
/// Returns a masked copy suitable for logging.
/// Sensitive values are replaced with placeholders while preserving structure.
pub fn masked(&self) -> Self {
Filter {
query_ids: self
.query_ids
.as_ref()
.map(|ids| vec![format!("<len:{}>", ids.len())]),
where_clause: self.where_clause.as_ref().map(|w| w.masked()),
}
}
}

/// The `Knn` operator searches for the nearest neighbours of the specified embedding. This is intended to use by executor
///
/// # Parameters
Expand Down Expand Up @@ -896,6 +910,27 @@ impl From<SparseVector> for QueryVector {
}
}

impl QueryVector {
/// Returns a masked copy suitable for logging.
/// Embedding data is replaced with dimension/size information.
pub fn masked(&self) -> Self {
match self {
QueryVector::Dense(vec) => {
// Single element encoding the dimension
QueryVector::Dense(vec![vec.len() as f32])
}
QueryVector::Sparse(sparse) => {
// Encode nnz (number of non-zeros) as single elements
QueryVector::Sparse(SparseVector {
indices: vec![sparse.indices.len() as u32],
values: vec![sparse.values.len() as f32],
tokens: None,
})
}
}
}
}

#[derive(Clone, Debug, PartialEq)]
pub struct KnnQuery {
pub query: QueryVector,
Expand Down Expand Up @@ -946,6 +981,14 @@ impl Rank {
.map(RankExpr::knn_queries)
.unwrap_or_default()
}

/// Returns a masked copy suitable for logging.
/// Sensitive values are replaced with placeholders while preserving structure.
pub fn masked(&self) -> Self {
Rank {
expr: self.expr.as_ref().map(|e| e.masked()),
}
}
}

impl TryFrom<chroma_proto::RankOperator> for Rank {
Expand Down Expand Up @@ -1380,6 +1423,50 @@ impl RankExpr {
},
}
}

/// Returns a masked copy suitable for logging.
/// Embedding data is replaced with dimension/size information while preserving structure.
pub fn masked(&self) -> Self {
match self {
RankExpr::Absolute(expr) => RankExpr::Absolute(Box::new(expr.masked())),
RankExpr::Division { left, right } => RankExpr::Division {
left: Box::new(left.masked()),
right: Box::new(right.masked()),
},
RankExpr::Exponentiation(expr) => RankExpr::Exponentiation(Box::new(expr.masked())),
RankExpr::Knn {
query,
key,
limit,
default,
return_rank,
} => RankExpr::Knn {
query: query.masked(),
key: key.clone(),
limit: *limit,
default: *default,
return_rank: *return_rank,
},
RankExpr::Logarithm(expr) => RankExpr::Logarithm(Box::new(expr.masked())),
RankExpr::Maximum(exprs) => {
RankExpr::Maximum(exprs.iter().map(|e| e.masked()).collect())
}
RankExpr::Minimum(exprs) => {
RankExpr::Minimum(exprs.iter().map(|e| e.masked()).collect())
}
RankExpr::Multiplication(exprs) => {
RankExpr::Multiplication(exprs.iter().map(|e| e.masked()).collect())
}
RankExpr::Subtraction { left, right } => RankExpr::Subtraction {
left: Box::new(left.masked()),
right: Box::new(right.masked()),
},
RankExpr::Summation(exprs) => {
RankExpr::Summation(exprs.iter().map(|e| e.masked()).collect())
}
RankExpr::Value(v) => RankExpr::Value(*v),
}
}
}

impl Add for RankExpr {
Expand Down
91 changes: 91 additions & 0 deletions rust/types/src/execution/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,20 @@ impl SearchPayload {
self.group_by = group_by;
self
}

/// Returns a masked copy of this payload suitable for logging.
/// Sensitive data (embeddings, query values, document patterns) are replaced with
/// placeholders that preserve structural information (dimensions, lengths) without
/// exposing actual content.
pub fn masked(&self) -> Self {
SearchPayload {
filter: self.filter.masked(),
rank: self.rank.masked(),
group_by: self.group_by.clone(),
limit: self.limit.clone(),
select: self.select.clone(),
}
}
}

#[cfg(feature = "utoipa")]
Expand Down Expand Up @@ -581,3 +595,80 @@ impl TryFrom<Search> for chroma_proto::SearchPlan {
})
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::{
operator::{Key, QueryVector, RankExpr},
DocumentExpression, DocumentOperator, MetadataComparison, MetadataExpression,
MetadataSetValue, MetadataValue, PrimitiveOperator, SetOperator, SparseVector, Where,
};

#[test]
fn test_search_payload_masked_hides_sensitive_info() {
let sensitive = [
"secret_id",
"[email protected]",
"123456789",
"confidential",
"secret query",
"secret_token",
];

let payload = SearchPayload {
filter: Filter {
query_ids: Some(vec!["secret_id".into()]),
where_clause: Some(
Where::Metadata(MetadataExpression {
key: "email".into(),
comparison: MetadataComparison::Primitive(
PrimitiveOperator::Equal,
MetadataValue::Str("[email protected]".into()),
),
}) & Where::Metadata(MetadataExpression {
key: "ssn".into(),
comparison: MetadataComparison::Primitive(
PrimitiveOperator::Equal,
MetadataValue::Int(123456789),
),
}) & Where::Metadata(MetadataExpression {
key: "tags".into(),
comparison: MetadataComparison::Set(
SetOperator::In,
MetadataSetValue::Str(vec!["confidential".into()]),
),
}) & Where::Document(DocumentExpression {
operator: DocumentOperator::Contains,
pattern: "secret query".into(),
}),
),
},
rank: Rank {
expr: Some(RankExpr::Knn {
query: QueryVector::Sparse(SparseVector {
indices: vec![1, 2, 3],
values: vec![0.1, 0.2, 0.3],
tokens: Some(vec!["secret_token".into()]),
}),
key: Key::Embedding,
limit: 10,
default: None,
return_rank: false,
}),
},
..Default::default()
};

let debug_output = format!("{:?}", payload.masked());

// Sensitive data must not appear
for s in &sensitive {
assert!(!debug_output.contains(s), "Leaked: {}", s);
}

// Structural info should be present
assert!(debug_output.contains("<len:"));
assert!(debug_output.contains("<m>"));
}
}
54 changes: 54 additions & 0 deletions rust/types/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,25 @@ impl Where {
},
}
}

/// Returns a masked copy suitable for logging.
/// Sensitive values are replaced with placeholders while preserving structure.
pub fn masked(&self) -> Self {
match self {
Where::Composite(expr) => Where::Composite(CompositeExpression {
operator: expr.operator.clone(),
children: expr.children.iter().map(|c| c.masked()).collect(),
}),
Where::Document(expr) => Where::Document(DocumentExpression {
operator: expr.operator.clone(),
pattern: format!("<len:{}>", expr.pattern.len()),
}),
Where::Metadata(expr) => Where::Metadata(MetadataExpression {
key: expr.key.clone(),
comparison: expr.comparison.masked(),
}),
}
}
}

impl BitAnd for Where {
Expand Down Expand Up @@ -1601,6 +1620,41 @@ pub enum MetadataComparison {
Set(SetOperator, MetadataSetValue),
}

impl MetadataComparison {
/// Returns a masked copy suitable for logging.
/// Sensitive values are replaced with placeholders while preserving type information.
pub fn masked(&self) -> Self {
match self {
MetadataComparison::Primitive(op, value) => {
let masked_value = match value {
MetadataValue::Bool(_) => MetadataValue::Bool(false),
MetadataValue::Int(_) => MetadataValue::Int(0),
MetadataValue::Float(_) => MetadataValue::Float(0.0),
MetadataValue::Str(_) => MetadataValue::Str("<m>".to_string()),
MetadataValue::SparseVector(sv) => MetadataValue::SparseVector(SparseVector {
indices: vec![sv.indices.len() as u32],
values: vec![sv.values.len() as f32],
tokens: None,
}),
};
MetadataComparison::Primitive(op.clone(), masked_value)
}
MetadataComparison::Set(op, set_value) => {
let masked_set = match set_value {
// Bool can't encode length, convert to Int
MetadataSetValue::Bool(v) => MetadataSetValue::Int(vec![v.len() as i64]),
MetadataSetValue::Int(v) => MetadataSetValue::Int(vec![v.len() as i64]),
MetadataSetValue::Float(v) => MetadataSetValue::Float(vec![v.len() as f64]),
MetadataSetValue::Str(v) => {
MetadataSetValue::Str(vec![format!("<len:{}>", v.len())])
}
};
MetadataComparison::Set(op.clone(), masked_set)
}
}
}
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "testing", derive(proptest_derive::Arbitrary))]
pub enum PrimitiveOperator {
Expand Down
Loading