Skip to content

Enable VPCLMULQDQ support on Rust 1.89+ #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ digest = { version = "0.10", features = ["alloc"] }
rand = "0.9"
libc = "0.2.171"
regex = "1.11.1"
rustversion = "1.0"

[dev-dependencies]
criterion = "0.5"
Expand All @@ -44,10 +45,8 @@ harness = false
[features]
alloc = []

# enable experimental VPCLMULQDQ support, which landed in Rust 1.89.0-nightly, will deprecate after 1.89.0 is stable
vpclmulqdq = []

# the features below aren't in use, are deprecated, and will be removed in the next MAJOR version
vpclmulqdq = [] # depreated, VPCLMULQDQ stabilized in Rust 1.89.0
Copy link
Preview

Copilot AI Jun 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct the spelling 'depreated' to 'deprecated' in the feature flag comment.

Suggested change
vpclmulqdq = [] # depreated, VPCLMULQDQ stabilized in Rust 1.89.0
vpclmulqdq = [] # deprecated, VPCLMULQDQ stabilized in Rust 1.89.0

Copilot uses AI. Check for mistakes.

optimize_crc32_auto = [] # deprecated
optimize_crc32_neon_eor3_v9s3x2e_s3 = [] # deprecated
optimize_crc32_neon_v12e_v1 = [] # deprecated
Expand Down
25 changes: 10 additions & 15 deletions src/arch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use aarch64::AArch64Ops;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use x86::X86Ops;

//#[rustversion::since(1.89)]
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
#[rustversion::since(1.89)]
#[cfg(target_arch = "x86_64")]
use vpclmulqdq::Vpclmulqdq512Ops;

mod aarch64;
Expand All @@ -49,28 +49,25 @@ pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64
}
}

//#[rustversion::before(1.89)]
#[rustversion::before(1.89)]
#[inline]
#[cfg(all(
not(feature = "vpclmulqdq"),
any(target_arch = "x86", target_arch = "x86_64")
))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "ssse3,sse4.1,pclmulqdq")]
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
update_x86_sse(state, bytes, params)
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(all(feature = "vpclmulqdq", target_arch = "x86"))]
#[cfg(target_arch = "x86")]
#[target_feature(enable = "ssse3,sse4.1,pclmulqdq")]
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
update_x86_sse(state, bytes, params)
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(all(feature = "vpclmulqdq", target_arch = "x86_64"))]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "ssse3,sse4.1,pclmulqdq")]
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
use std::arch::is_x86_feature_detected;
Expand Down Expand Up @@ -117,8 +114,7 @@ unsafe fn update_x86_sse(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
}
}

//#[rustversion::before(1.89)]
#[cfg(not(feature = "vpclmulqdq"))]
#[rustversion::before(1.89)]
pub fn get_target() -> String {
#[cfg(target_arch = "aarch64")]
{
Expand All @@ -137,8 +133,7 @@ pub fn get_target() -> String {
return "software-fallback-tables".to_string();
}

//#[rustversion::since(1.89)]
#[cfg(feature = "vpclmulqdq")]
#[rustversion::since(1.89)]
pub fn get_target() -> String {
#[cfg(target_arch = "aarch64")]
{
Expand Down
36 changes: 18 additions & 18 deletions src/arch/vpclmulqdq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,33 @@
//!
//! It performs folding using 4 x ZMM registers of 512-bits each.

#![cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
#![cfg(target_arch = "x86_64")]

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
use crate::arch::x86::X86Ops;

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
use crate::enums::Reflector;

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
use crate::structs::CrcState;

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
use crate::traits::{ArchOps, EnhancedCrcWidth};

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
use std::arch::x86_64::*;

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
use std::ops::BitXor;

/// Implements the ArchOps trait using 512-bit AVX-512 and VPCLMULQDQ instructions at 512 bits.
/// Delegates to X86Ops for standard 128-bit operations
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[derive(Debug, Copy, Clone)]
pub struct Vpclmulqdq512Ops(X86Ops);

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
impl Vpclmulqdq512Ops {
#[inline(always)]
pub fn new() -> Self {
Expand All @@ -39,11 +39,11 @@ impl Vpclmulqdq512Ops {
}

// Wrapper for __m512i to make it easier to work with
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[derive(Debug, Copy, Clone)]
struct Simd512(__m512i);

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
impl Simd512 {
#[inline]
#[target_feature(enable = "avx512f")]
Expand Down Expand Up @@ -112,7 +112,7 @@ impl Simd512 {
}
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
impl Vpclmulqdq512Ops {
/// Process aligned blocks using VPCLMULQDQ with 4 x 512-bit registers
///
Expand Down Expand Up @@ -339,15 +339,15 @@ impl Vpclmulqdq512Ops {
}

// 512-bit version of the Reflector
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[derive(Clone, Copy)]
enum Reflector512 {
NoReflector,
ForwardReflector { smask: Simd512 },
}

// Function to create the appropriate reflector based on CRC parameters
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline(always)]
unsafe fn create_reflector512(reflected: bool) -> Reflector512 {
if reflected {
Expand All @@ -369,7 +369,7 @@ unsafe fn create_reflector512(reflected: bool) -> Reflector512 {
}

// Function to apply reflection to a 512-bit vector
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline(always)]
unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 {
match reflector {
Expand All @@ -379,12 +379,12 @@ unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 {
}

// pre-compute the reverse indices for 512-bit shuffling
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
static REVERSE_INDICES_512: __m512i =
unsafe { std::mem::transmute([7u64, 6u64, 5u64, 4u64, 3u64, 2u64, 1u64, 0u64]) };

// Implement a 512-bit byte shuffle function
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[target_feature(enable = "avx512f,avx512bw")]
unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 {
Expand All @@ -396,7 +396,7 @@ unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 {
}

// Delegate all ArchOps methods to the inner X86Ops instance
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
impl ArchOps for Vpclmulqdq512Ops {
type Vector = __m128i;

Expand Down
9 changes: 3 additions & 6 deletions src/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,8 @@ impl ArchOps for X86Ops {
_mm_clmulepi64_si128(a, b, 0x11)
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(feature = "vpclmulqdq")]
#[target_feature(enable = "avx512f,avx512vl")]
unsafe fn xor3_vectors(
&self,
Expand All @@ -244,9 +243,8 @@ impl ArchOps for X86Ops {
self.xor3_vectors_sse(a, b, c)
}

//#[rustversion::before(1.89)]
#[rustversion::before(1.89)]
#[inline]
#[cfg(not(feature = "vpclmulqdq"))]
#[target_feature(enable = "sse4.1")]
unsafe fn xor3_vectors(
&self,
Expand Down Expand Up @@ -321,9 +319,8 @@ impl X86Ops {
}
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(feature = "vpclmulqdq")]
#[target_feature(enable = "avx512f,avx512vl")]
unsafe fn xor3_vectors_avx512(&self, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
_mm_ternarylogic_epi64(
Expand Down
24 changes: 8 additions & 16 deletions src/crc32/fusion/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,14 @@
use std::arch::x86_64::*;

/// Safe wrapper for CRC32 iSCSI calculation using AVX-512
//#[rustversion::before(1.89)]
#[rustversion::before(1.89)]
#[inline(always)]
#[cfg(not(feature = "vpclmulqdq"))]
pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 {
unsafe { crc32_iscsi_sse_v4s3x3(crc, data.as_ptr(), data.len()) }
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline(always)]
#[cfg(feature = "vpclmulqdq")]
pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 {
if is_x86_feature_detected!("vpclmulqdq")
&& is_x86_feature_detected!("avx512f")
Expand All @@ -49,17 +47,15 @@ pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 {
unsafe { crc32_iscsi_sse_v4s3x3(crc, data.as_ptr(), data.len()) }
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(feature = "vpclmulqdq")]
#[target_feature(enable = "avx512f,avx512vl,vpclmulqdq")]
unsafe fn clmul_lo_avx512_vpclmulqdq(a: __m512i, b: __m512i) -> __m512i {
_mm512_clmulepi64_epi128(a, b, 0)
}

//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(feature = "vpclmulqdq")]
#[target_feature(enable = "avx512f,avx512vl,vpclmulqdq")]
unsafe fn clmul_hi_avx512_vpclmulqdq(a: __m512i, b: __m512i) -> __m512i {
_mm512_clmulepi64_epi128(a, b, 17)
Expand Down Expand Up @@ -142,9 +138,8 @@ unsafe fn mm_crc32_u64(crc: u32, val: u64) -> u32 {
/// using:
///
/// ./generate -i avx512_vpclmulqdq -p crc32c -a v3x2
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(feature = "vpclmulqdq")]
#[target_feature(enable = "avx512f,avx512vl,vpclmulqdq,sse4.2")]
pub unsafe fn crc32_iscsi_avx512_vpclmulqdq_v3x2(
mut crc0: u32,
Expand Down Expand Up @@ -341,9 +336,8 @@ pub unsafe fn crc32_iscsi_avx512_vpclmulqdq_v3x2(
/// using:
///
/// ./generate -i avx512 -p crc32c -a v4s3x3
//#[rustversion::since(1.89)]
#[rustversion::since(1.89)]
#[inline]
#[cfg(feature = "vpclmulqdq")]
#[target_feature(enable = "avx2,avx512f,avx512vl,pclmulqdq")]
pub unsafe fn crc32_iscsi_avx512_v4s3x3(mut crc0: u32, mut buf: *const u8, mut len: usize) -> u32 {
// Align to 8-byte boundary using hardware CRC32C instructions
Expand Down Expand Up @@ -689,8 +683,7 @@ mod tests {
}
}

//#[rustversion::since(1.89)]
#[cfg(feature = "vpclmulqdq")]
#[rustversion::since(1.89)]
fn test_crc32_iscsi_random(len: usize) {
let mut data = vec![0u8; len];
rng().fill(&mut data[..]);
Expand Down Expand Up @@ -728,8 +721,7 @@ mod tests {
}
}

//#[rustversion::before(1.89)]
#[cfg(not(feature = "vpclmulqdq"))]
#[rustversion::before(1.89)]
fn test_crc32_iscsi_random(len: usize) {
let mut data = vec![0u8; len];
rng().fill(&mut data[..]);
Expand Down