|
1 | 1 | #[cfg(test)]
|
2 | 2 | const MAX_COMPARE_SIZE: usize = 256;
|
3 | 3 |
|
| 4 | +#[inline(always)] |
4 | 5 | pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize {
|
5 | 6 | let src0 = first_chunk::<_, 256>(src0).unwrap();
|
6 | 7 | let src1 = first_chunk::<_, 256>(src1).unwrap();
|
7 | 8 |
|
8 | 9 | compare256(src0, src1)
|
9 | 10 | }
|
10 | 11 |
|
| 12 | +#[inline(always)] |
11 | 13 | fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
|
12 |
| - #[cfg(target_arch = "x86_64")] |
13 |
| - if crate::cpu_features::is_enabled_avx2() { |
14 |
| - return unsafe { avx2::compare256(src0, src1) }; |
15 |
| - } |
| 14 | + #[cfg(target_feature = "avx2")] |
| 15 | + return avx2::compare256(src0, src1); |
16 | 16 |
|
17 |
| - #[cfg(target_arch = "aarch64")] |
18 |
| - if crate::cpu_features::is_enabled_neon() { |
19 |
| - return unsafe { neon::compare256(src0, src1) }; |
20 |
| - } |
| 17 | + #[cfg(target_feature = "neon")] |
| 18 | + return neon::compare256(src0, src1); |
| 19 | + |
| 20 | + #[cfg(target_feature = "simd128")] |
| 21 | + return wasm32::compare256(src0, src1); |
| 22 | + |
| 23 | + #[allow(unreachable_code)] |
| 24 | + compare256_via_function_pointer(src0, src1) |
| 25 | +} |
| 26 | + |
| 27 | +#[inline(always)] |
| 28 | +fn compare256_via_function_pointer(src0: &[u8; 256], src1: &[u8; 256]) -> usize { |
| 29 | + use core::sync::atomic::{AtomicPtr, Ordering}; |
21 | 30 |
|
22 |
| - #[cfg(target_arch = "wasm32")] |
23 |
| - if crate::cpu_features::is_enabled_simd128() { |
24 |
| - return wasm32::compare256(src0, src1); |
| 31 | + type F = unsafe fn(&[u8; 256], &[u8; 256]) -> usize; |
| 32 | + |
| 33 | + static PTR: AtomicPtr<()> = AtomicPtr::new(initializer as *mut ()); |
| 34 | + |
| 35 | + fn initializer(src0: &[u8; 256], src1: &[u8; 256]) -> usize { |
| 36 | + let ptr = match () { |
| 37 | + #[cfg(target_arch = "x86_64")] |
| 38 | + _ if crate::cpu_features::is_enabled_avx2() => avx2::compare256 as F, |
| 39 | + #[cfg(target_arch = "aarch64")] |
| 40 | + _ if crate::cpu_features::is_enabled_neon() => neon::compare256 as F, |
| 41 | + #[cfg(target_arch = "wasm32")] |
| 42 | + _ if crate::cpu_features::is_enabled_simd128() => wasm32::compare256 as F, |
| 43 | + _ => rust::compare256 as F, |
| 44 | + }; |
| 45 | + |
| 46 | + PTR.store(ptr as *mut (), Ordering::Relaxed); |
| 47 | + |
| 48 | + // Safety: we've validated the target feature requirements |
| 49 | + unsafe { ptr(src0, src1) } |
25 | 50 | }
|
26 | 51 |
|
27 |
| - rust::compare256(src0, src1) |
| 52 | + let ptr = PTR.load(Ordering::Relaxed); |
| 53 | + |
| 54 | + // Safety: we trust this function pointer (PTR is local to the function) |
| 55 | + let dynamic_compare256 = unsafe { core::mem::transmute::<*mut (), F>(ptr) }; |
| 56 | + |
| 57 | + // Safety: we've validated the target feature requirements |
| 58 | + unsafe { dynamic_compare256(src0, src1) } |
28 | 59 | }
|
29 | 60 |
|
30 | 61 | pub fn compare256_rle_slice(byte: u8, src: &[u8]) -> usize {
|
|
0 commit comments