Skip to content

Commit bc8a692

Browse files
committed
store function pointer for the correct compare256 in a static
when e.g. the avx2 target feature is not enabled at compile time, but the feature is available at runtime, this approach reduces branching. We still dispatch statically if the target feature is already enabled at compile time
1 parent 4ae3517 commit bc8a692

File tree

1 file changed

+43
-12
lines changed

1 file changed

+43
-12
lines changed

zlib-rs/src/deflate/compare256.rs

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,61 @@
11
#[cfg(test)]
22
const MAX_COMPARE_SIZE: usize = 256;
33

4+
#[inline(always)]
45
pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize {
56
let src0 = first_chunk::<_, 256>(src0).unwrap();
67
let src1 = first_chunk::<_, 256>(src1).unwrap();
78

89
compare256(src0, src1)
910
}
1011

12+
#[inline(always)]
1113
fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
12-
#[cfg(target_arch = "x86_64")]
13-
if crate::cpu_features::is_enabled_avx2() {
14-
return unsafe { avx2::compare256(src0, src1) };
15-
}
14+
#[cfg(target_feature = "avx2")]
15+
return avx2::compare256(src0, src1);
1616

17-
#[cfg(target_arch = "aarch64")]
18-
if crate::cpu_features::is_enabled_neon() {
19-
return unsafe { neon::compare256(src0, src1) };
20-
}
17+
#[cfg(target_feature = "neon")]
18+
return neon::compare256(src0, src1);
19+
20+
#[cfg(target_feature = "simd128")]
21+
return wasm32::compare256(src0, src1);
22+
23+
#[allow(unreachable_code)]
24+
compare256_via_function_pointer(src0, src1)
25+
}
26+
27+
#[inline(always)]
28+
fn compare256_via_function_pointer(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
29+
use core::sync::atomic::{AtomicPtr, Ordering};
2130

22-
#[cfg(target_arch = "wasm32")]
23-
if crate::cpu_features::is_enabled_simd128() {
24-
return wasm32::compare256(src0, src1);
31+
type F = unsafe fn(&[u8; 256], &[u8; 256]) -> usize;
32+
33+
static PTR: AtomicPtr<()> = AtomicPtr::new(initializer as *mut ());
34+
35+
fn initializer(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
36+
let ptr = match () {
37+
#[cfg(target_arch = "x86_64")]
38+
_ if crate::cpu_features::is_enabled_avx2() => avx2::compare256 as F,
39+
#[cfg(target_arch = "aarch64")]
40+
_ if crate::cpu_features::is_enabled_neon() => neon::compare256 as F,
41+
#[cfg(target_arch = "wasm32")]
42+
_ if crate::cpu_features::is_enabled_simd128() => wasm32::compare256 as F,
43+
_ => rust::compare256 as F,
44+
};
45+
46+
PTR.store(ptr as *mut (), Ordering::Relaxed);
47+
48+
// Safety: we've validated the target feature requirements
49+
unsafe { ptr(src0, src1) }
2550
}
2651

27-
rust::compare256(src0, src1)
52+
let ptr = PTR.load(Ordering::Relaxed);
53+
54+
// Safety: we trust this function pointer (PTR is local to the function)
55+
let dynamic_compare256 = unsafe { core::mem::transmute::<*mut (), F>(ptr) };
56+
57+
// Safety: we've validated the target feature requirements
58+
unsafe { dynamic_compare256(src0, src1) }
2859
}
2960

3061
pub fn compare256_rle_slice(byte: u8, src: &[u8]) -> usize {

0 commit comments

Comments
 (0)