Skip to content

Commit 2aa01ee

Browse files
committed
Hashed utility for ValueMap to improve hashing performance
1 parent 0cc2cd5 commit 2aa01ee

File tree

4 files changed

+222
-20
lines changed

4 files changed

+222
-20
lines changed

opentelemetry-sdk/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ tokio = { workspace = true, features = ["rt", "time"], optional = true }
2929
tokio-stream = { workspace = true, optional = true }
3030
http = { workspace = true, optional = true }
3131
tracing = {workspace = true, optional = true}
32+
rustc-hash = "2.0.0"
3233

3334
[package.metadata.docs.rs]
3435
all-features = true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
use std::{
2+
borrow::{Borrow, Cow},
3+
hash::{BuildHasher, Hash, Hasher},
4+
ops::Deref,
5+
};
6+
7+
use rustc_hash::*;
8+
9+
/// Hash value only once, works with references and owned types.
10+
pub(crate) struct Hashed<'a, T>
11+
where
12+
T: ToOwned + ?Sized,
13+
{
14+
value: Cow<'a, T>,
15+
hash: u64,
16+
}
17+
18+
impl<'a, T> Hashed<'a, T>
19+
where
20+
T: ToOwned + Hash + ?Sized,
21+
{
22+
pub(crate) fn from_borrowed(value: &'a T) -> Self {
23+
let mut hasher = FxHasher::default();
24+
value.hash(&mut hasher);
25+
Self {
26+
value: Cow::Borrowed(value),
27+
hash: hasher.finish(),
28+
}
29+
}
30+
31+
pub(crate) fn from_owned(value: <T as ToOwned>::Owned) -> Self {
32+
let hash = calc_hash(value.borrow());
33+
Self {
34+
value: Cow::Owned(value),
35+
hash,
36+
}
37+
}
38+
39+
pub(crate) fn mutate(self, f: impl FnOnce(&mut <T as ToOwned>::Owned)) -> Hashed<'static, T> {
40+
let mut value = self.value.into_owned();
41+
f(&mut value);
42+
let hash = calc_hash(value.borrow());
43+
Hashed {
44+
value: Cow::Owned(value),
45+
hash,
46+
}
47+
}
48+
49+
pub(crate) fn into_owned(self) -> Hashed<'static, T> {
50+
let value = self.value.into_owned();
51+
Hashed {
52+
value: Cow::Owned(value),
53+
hash: self.hash,
54+
}
55+
}
56+
57+
pub(crate) fn into_inner_owned(self) -> T::Owned {
58+
self.value.into_owned()
59+
}
60+
}
61+
62+
fn calc_hash<T>(value: T) -> u64
63+
where
64+
T: Hash,
65+
{
66+
let mut hasher = FxHasher::default();
67+
value.hash(&mut hasher);
68+
hasher.finish()
69+
}
70+
71+
impl<T> Clone for Hashed<'_, T>
72+
where
73+
T: ToOwned + ?Sized,
74+
{
75+
fn clone(&self) -> Self {
76+
Self {
77+
value: self.value.clone(),
78+
hash: self.hash,
79+
}
80+
}
81+
82+
fn clone_from(&mut self, source: &Self) {
83+
self.value.clone_from(&source.value);
84+
self.hash = source.hash;
85+
}
86+
}
87+
88+
impl<T> Hash for Hashed<'_, T>
89+
where
90+
T: ToOwned + Hash + ?Sized,
91+
{
92+
fn hash<H: Hasher>(&self, state: &mut H) {
93+
state.write_u64(self.hash);
94+
}
95+
}
96+
97+
impl<T> PartialEq for Hashed<'_, T>
98+
where
99+
T: ToOwned + PartialEq + ?Sized,
100+
{
101+
fn eq(&self, other: &Self) -> bool {
102+
self.value.as_ref() == other.value.as_ref()
103+
}
104+
}
105+
106+
impl<T> Eq for Hashed<'_, T> where T: ToOwned + Eq + ?Sized {}
107+
108+
impl<T> Deref for Hashed<'_, T>
109+
where
110+
T: ToOwned + ?Sized,
111+
{
112+
type Target = T;
113+
114+
fn deref(&self) -> &Self::Target {
115+
self.value.deref()
116+
}
117+
}
118+
119+
/// Used to make [`Hashed`] values no-op in [`HashMap`](std::collections::HashMap) or [`HashSet`](std::collections::HashSet).
120+
/// For all other keys types (except for [`u64`]) it will panic.
121+
#[derive(Default, Clone)]
122+
pub(crate) struct HashedNoOpBuilder {
123+
hashed: u64,
124+
}
125+
126+
impl Hasher for HashedNoOpBuilder {
127+
fn finish(&self) -> u64 {
128+
self.hashed
129+
}
130+
131+
fn write(&mut self, _bytes: &[u8]) {
132+
panic!("Only works with `Hashed` value")
133+
}
134+
135+
fn write_u64(&mut self, i: u64) {
136+
self.hashed = i;
137+
}
138+
}
139+
140+
impl BuildHasher for HashedNoOpBuilder {
141+
type Hasher = HashedNoOpBuilder;
142+
143+
fn build_hasher(&self) -> Self::Hasher {
144+
HashedNoOpBuilder::default()
145+
}
146+
}

opentelemetry-sdk/src/metrics/internal/mod.rs

+75-15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod aggregate;
22
mod exponential_histogram;
3+
mod hashed;
34
mod histogram;
45
mod last_value;
56
mod precomputed_sum;
@@ -15,13 +16,12 @@ use std::sync::{Arc, RwLock};
1516
use aggregate::is_under_cardinality_limit;
1617
pub(crate) use aggregate::{AggregateBuilder, ComputeAggregation, Measure};
1718
pub(crate) use exponential_histogram::{EXPO_MAX_SCALE, EXPO_MIN_SCALE};
19+
use hashed::{Hashed, HashedNoOpBuilder};
1820
use once_cell::sync::Lazy;
1921
use opentelemetry::{otel_warn, KeyValue};
2022

21-
use crate::metrics::AttributeSet;
22-
23-
pub(crate) static STREAM_OVERFLOW_ATTRIBUTES: Lazy<Vec<KeyValue>> =
24-
Lazy::new(|| vec![KeyValue::new("otel.metric.overflow", "true")]);
23+
pub(crate) static STREAM_OVERFLOW_ATTRIBUTES: Lazy<Hashed<'static, [KeyValue]>> =
24+
Lazy::new(|| Hashed::from_owned(vec![KeyValue::new("otel.metric.overflow", "true")]));
2525

2626
pub(crate) trait Aggregator {
2727
/// A static configuration that is needed in order to initialize aggregator.
@@ -52,7 +52,7 @@ where
5252
A: Aggregator,
5353
{
5454
/// Trackers store the values associated with different attribute sets.
55-
trackers: RwLock<HashMap<Vec<KeyValue>, Arc<A>>>,
55+
trackers: RwLock<HashMap<Hashed<'static, [KeyValue]>, Arc<A>, HashedNoOpBuilder>>,
5656
/// Number of different attribute set stored in the `trackers` map.
5757
count: AtomicUsize,
5858
/// Indicates whether a value with no attributes has been stored.
@@ -69,7 +69,7 @@ where
6969
{
7070
fn new(config: A::InitConfig) -> Self {
7171
ValueMap {
72-
trackers: RwLock::new(HashMap::new()),
72+
trackers: RwLock::new(HashMap::default()),
7373
has_no_attribute_value: AtomicBool::new(false),
7474
no_attribute_tracker: A::create(&config),
7575
count: AtomicUsize::new(0),
@@ -84,19 +84,25 @@ where
8484
return;
8585
}
8686

87+
let attributes = Hashed::from_borrowed(attributes);
88+
8789
let Ok(trackers) = self.trackers.read() else {
8890
return;
8991
};
9092

9193
// Try to retrieve and update the tracker with the attributes in the provided order first
92-
if let Some(tracker) = trackers.get(attributes) {
94+
if let Some(tracker) = trackers.get(&attributes) {
9395
tracker.update(value);
9496
return;
9597
}
9698

9799
// Try to retrieve and update the tracker with the attributes sorted.
98-
let sorted_attrs = AttributeSet::from(attributes).into_vec();
99-
if let Some(tracker) = trackers.get(sorted_attrs.as_slice()) {
100+
let sorted_attrs = attributes.clone().mutate(|list| {
101+
// use stable sort
102+
list.sort_by(|a, b| a.key.cmp(&b.key));
103+
dedup_remove_first(list, |a, b| a.key == b.key);
104+
});
105+
if let Some(tracker) = trackers.get(&sorted_attrs) {
100106
tracker.update(value);
101107
return;
102108
}
@@ -110,20 +116,20 @@ where
110116

111117
// Recheck both the provided and sorted orders after acquiring the write lock
112118
// in case another thread has pushed an update in the meantime.
113-
if let Some(tracker) = trackers.get(attributes) {
119+
if let Some(tracker) = trackers.get(&attributes) {
114120
tracker.update(value);
115-
} else if let Some(tracker) = trackers.get(sorted_attrs.as_slice()) {
121+
} else if let Some(tracker) = trackers.get(&sorted_attrs) {
116122
tracker.update(value);
117123
} else if is_under_cardinality_limit(self.count.load(Ordering::SeqCst)) {
118124
let new_tracker = Arc::new(A::create(&self.config));
119125
new_tracker.update(value);
120126

121127
// Insert tracker with the attributes in the provided and sorted orders
122-
trackers.insert(attributes.to_vec(), new_tracker.clone());
128+
trackers.insert(attributes.into_owned(), new_tracker.clone());
123129
trackers.insert(sorted_attrs, new_tracker);
124130

125131
self.count.fetch_add(1, Ordering::SeqCst);
126-
} else if let Some(overflow_value) = trackers.get(STREAM_OVERFLOW_ATTRIBUTES.as_slice()) {
132+
} else if let Some(overflow_value) = trackers.get(&STREAM_OVERFLOW_ATTRIBUTES) {
127133
overflow_value.update(value);
128134
} else {
129135
let new_tracker = A::create(&self.config);
@@ -153,7 +159,7 @@ where
153159
let mut seen = HashSet::new();
154160
for (attrs, tracker) in trackers.iter() {
155161
if seen.insert(Arc::as_ptr(tracker)) {
156-
dest.push(map_fn(attrs.clone(), tracker));
162+
dest.push(map_fn(attrs.clone().into_inner_owned(), tracker));
157163
}
158164
}
159165
}
@@ -183,8 +189,25 @@ where
183189
let mut seen = HashSet::new();
184190
for (attrs, tracker) in trackers.into_iter() {
185191
if seen.insert(Arc::as_ptr(&tracker)) {
186-
dest.push(map_fn(attrs, tracker.clone_and_reset(&self.config)));
192+
dest.push(map_fn(
193+
attrs.into_inner_owned(),
194+
tracker.clone_and_reset(&self.config),
195+
));
196+
}
197+
}
198+
}
199+
}
200+
201+
fn dedup_remove_first<T>(values: &mut Vec<T>, is_eq: impl Fn(&T, &T) -> bool) {
202+
// we cannot use vec.dedup_by because it will remove last duplicate not first
203+
if values.len() > 1 {
204+
let mut i = values.len() - 1;
205+
while i != 0 {
206+
let is_same = unsafe { is_eq(values.get_unchecked(i - 1), values.get_unchecked(i)) };
207+
if is_same {
208+
values.remove(i - 1);
187209
}
210+
i -= 1;
188211
}
189212
}
190213
}
@@ -392,8 +415,45 @@ impl AtomicallyUpdate<f64> for f64 {
392415

393416
#[cfg(test)]
394417
mod tests {
418+
use std::usize;
419+
395420
use super::*;
396421

422+
fn assert_deduped<const N: usize, const M: usize>(
423+
input: [(i32, bool); N],
424+
expect: [(i32, bool); M],
425+
) {
426+
let mut list: Vec<(i32, bool)> = Vec::from(input);
427+
dedup_remove_first(&mut list, |a, b| a.0 == b.0);
428+
assert_eq!(list, expect);
429+
}
430+
431+
#[test]
432+
fn deduplicate_by_removing_first_element_from_sorted_array() {
433+
assert_deduped([], []);
434+
assert_deduped([(1, true)], [(1, true)]);
435+
assert_deduped([(1, false), (1, false), (1, true)], [(1, true)]);
436+
assert_deduped(
437+
[(1, true), (2, false), (2, false), (2, true)],
438+
[(1, true), (2, true)],
439+
);
440+
assert_deduped(
441+
[(1, true), (1, false), (1, true), (2, true)],
442+
[(1, true), (2, true)],
443+
);
444+
assert_deduped(
445+
[
446+
(1, false),
447+
(1, true),
448+
(2, false),
449+
(2, true),
450+
(3, false),
451+
(3, true),
452+
],
453+
[(1, true), (2, true), (3, true)],
454+
);
455+
}
456+
397457
#[test]
398458
fn can_store_u64_atomic_value() {
399459
let atomic = u64::new_atomic_tracker(0);

opentelemetry-sdk/src/metrics/mod.rs

-5
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,6 @@ impl AttributeSet {
138138
pub(crate) fn iter(&self) -> impl Iterator<Item = (&Key, &Value)> {
139139
self.0.iter().map(|kv| (&kv.key, &kv.value))
140140
}
141-
142-
/// Returns the underlying Vec of KeyValue pairs
143-
pub(crate) fn into_vec(self) -> Vec<KeyValue> {
144-
self.0
145-
}
146141
}
147142

148143
impl Hash for AttributeSet {

0 commit comments

Comments
 (0)