[WIP] crypto-intrinsics

tarcieri · tarcieri · commit 62050d62a81f · 2022-02-06T12:26:03.000-07:00
With the forthcoming stabilization of inline assembly, I thought we could start putting together a crate which provides wrappers for assembly instructions which are useful for cryptography but lack proper `core::arch` wrappers. More importantly, using inline assembly allows us to provide a sort of black box which LLVM will not interfere with, which is problematic using anything else besides ASM. For example, it's otherwise not possible to correctly emit CMOV instructions on x86 platforms with LLVM because the `x86-cmov-conversion` pass which will rewrite them as branches. For more details, see: https://dsprenkels.com/cmov-conversion.html
diff --git a/.github/workflows/crypto-intrinsics.yml b/.github/workflows/crypto-intrinsics.yml
@@ -0,0 +1,69 @@
+name: crypto-intrinsics
+
+on:
+  pull_request:
+    paths:
+      - "crypto-intrinsics/**"
+      - "Cargo.*"
+  push:
+    branches: master
+
+defaults:
+  run:
+    working-directory: crypto-intrinsics
+
+env:
+  CARGO_INCREMENTAL: 0
+  RUSTFLAGS: "-Dwarnings"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        rust:
+          - beta # MSRV
+        target:
+          - thumbv7em-none-eabi
+          - wasm32-unknown-unknown
+    steps:
+      - uses: actions/checkout@v1
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: ${{ matrix.rust }}
+          target: ${{ matrix.target }}
+          override: true
+      - run: cargo build --target ${{ matrix.target }} --release
+
+  test:
+    strategy:
+      matrix:
+        include:
+          # 32-bit Linux
+          - target: i686-unknown-linux-gnu
+            platform: ubuntu-latest
+            rust: beta # MSRV
+            deps: sudo apt update && sudo apt install gcc-multilib
+
+          # 64-bit Linux
+          - target: x86_64-unknown-linux-gnu
+            platform: ubuntu-latest
+            rust: beta # MSRV
+
+          # 64-bit Windows
+          - target: x86_64-pc-windows-msvc
+            platform: windows-latest
+            rust: beta # MSRV
+
+    runs-on: ${{ matrix.platform }}
+    steps:
+      - uses: actions/checkout@v1
+      - uses: actions-rs/toolchain@v1
+        with:
+          toolchain: ${{ matrix.rust }}
+          target: ${{ matrix.target }}
+          profile: minimal
+          override: true
+      - run: ${{ matrix.deps }}
+      - run: cargo test --release
diff --git a/.github/workflows/workspace.yml b/.github/workflows/workspace.yml
@@ -16,7 +16,7 @@ jobs:
     - uses: actions/checkout@v1
     - uses: actions-rs/toolchain@v1
       with:
-        toolchain: 1.51.0 # Highest MSRV in repo
+        toolchain: beta # Highest MSRV in repo
         components: clippy
         override: true
         profile: minimal
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,6 +5,7 @@ members = [
     "block-padding",
     "collectable",
     "cpufeatures",
+    "crypto-intrinsics",
     "dbl",
     "hex-literal",
     "opaque-debug",
diff --git a/crypto-intrinsics/Cargo.toml b/crypto-intrinsics/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "crypto-intrinsics"
+description = "Reserved for future use"
+version = "0.0.0"
+authors = ["RustCrypto Developers"]
+license = "Apache-2.0 OR MIT"
+edition = "2018"
+repository = "https://github.com/RustCrypto/traits"
+keywords = ["crypto"]
+categories = ["cryptography"]
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
diff --git a/crypto-intrinsics/src/lib.rs b/crypto-intrinsics/src/lib.rs
@@ -0,0 +1,6 @@
+#![no_std]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+
+#[cfg(target_arch = "x86_64")]
+#[cfg_attr(docsrs, doc(cfg(target_arch = "x86_64")))]
+pub mod x86_64;
diff --git a/crypto-intrinsics/src/x86_64.rs b/crypto-intrinsics/src/x86_64.rs
@@ -0,0 +1,157 @@
+//! `x86_64` intrinsics
+
+use core::arch::asm;
+
+/// Unsigned integer addition of two operands with carry flag.
+///
+/// Performs an unsigned addition of the destination operand (first operand),
+/// the source operand (second operand) and the carry-flag (CF) and stores the
+/// result in the destination operand. The destination operand is a
+/// general-purpose register, whereas the source operand can be a
+/// general-purpose register or memory location. The state of CF can represent
+/// a carry from a previous addition. The instruction sets the CF flag with the
+/// carry generated by the unsigned addition of the operands.
+///
+/// The ADCX instruction is executed in the context of multi-precision addition
+/// where we add a series of operands with a carry-chain. At the beginning of a
+/// chain of additions, we need to make sure the CF is in a desired initial
+/// state. Often, this initial state needs to be 0, which can be achieved with
+/// an instruction to zero the CF (e.g. XOR).
+///
+/// # Safety
+///
+/// This function requires support for the Intel ADX (Multi-Precision Add-Carry
+/// Instruction) extension to the x86 instruction set.
+///
+/// If called on a CPU which does not support this extension, it will crash the
+/// process with an illegal instruction exception (i.e. `SIGILL`)
+#[inline(always)]
+pub unsafe fn adcx(a: &mut u64, b: u64) {
+    asm! {
+        "adcx {0}, {1}",
+        inout(reg) *a,
+        in(reg) b
+    };
+}
+
+/// Unsigned integer addition of two operations with overflow flag.
+///
+/// Performs an unsigned addition of the destination operand (first operand),
+/// the source operand (second operand) and the overflow-flag (OF) and stores
+/// the result in the destination operand. The destination operand is a
+/// general-purpose register, whereas the source operand can be a
+/// general-purpose register or memory location. The state of OF represents a
+/// carry from a previous addition. The instruction sets the OF flag with the
+/// carry generated by the unsigned addition of the operands.
+///
+/// The ADOX instruction is executed in the context of multi-precision
+/// addition, where we add a series of operands with a carry-chain. At the
+/// beginning of a chain of additions, we execute an instruction to zero the OF
+/// (e.g. XOR).
+///
+/// # Safety
+///
+/// This function requires support for the Intel ADX (Multi-Precision Add-Carry
+/// Instruction) extension to the x86 instruction set.
+///
+/// If called on a CPU which does not support this extension, it will crash the
+/// process with an illegal instruction exception (i.e. `SIGILL`)
+#[inline(always)]
+pub unsafe fn adox(a: &mut u64, b: u64) {
+    asm! {
+        "adox {0}, {1}",
+        inout(reg) *a,
+        in(reg) b
+    };
+}
+
+/// Move if zero.
+///
+/// Uses a `test` instruction to check if the given `condition` value is
+/// equal to zero, then calls `cmovz` (a.k.a. `cmove`) to conditionally move
+/// `src` to `dst` when `condition` is equal to zero.
+#[inline(always)]
+pub fn cmovz(condition: u64, src: u64, dst: &mut u64) {
+    unsafe {
+        asm! {
+            "test {0}, {0}",
+            "cmovz {1}, {2}",
+            in(reg) condition,
+            inlateout(reg) *dst,
+            in(reg) src
+        };
+    }
+}
+
+/// Move if not zero.
+///
+/// Uses a `test` instruction to check if the given `condition` value is not
+/// equal to zero, then calls `cmovnz` (a.k.a. `cmovne`) to conditionally move
+/// `src` to `dst` when `condition` is nonzero.
+#[inline(always)]
+pub fn cmovnz(condition: u64, src: u64, dst: &mut u64) {
+    unsafe {
+        asm! {
+            "test {0}, {0}",
+            "cmovnz {1}, {2}",
+            in(reg) condition,
+            inlateout(reg) *dst,
+            in(reg) src
+        };
+    }
+}
+
+/// Unsigned multiply without affecting flags.
+///
+/// Performs an unsigned multiplication of the implicit source operand and the
+/// specified source operand (the third operand) and stores the low half of the
+/// result in the second destination (second operand), the high half of the
+/// result in the first destination operand (first operand), without reading or
+/// writing the arithmetic flags.
+///
+/// This enables efficient programming where the software can interleave add
+/// with carry operations and multiplications.
+///
+/// If the first and second operand are identical, it will contain the high
+/// half of the multiplication result.
+///
+/// # Safety
+///
+/// This function requires support for the Intel BMI2 (Bit Manipulation
+/// Instruction Set 2) extension to the x86 instruction set.
+///
+/// If called on a CPU which does not support this extension, it will crash the
+/// process with an illegal instruction exception (i.e. `SIGILL`)
+#[target_feature(enable = "bmi2")]
+pub unsafe fn mulx(a: u64, b: u64, lo: &mut u64, hi: &mut u64) {
+    asm! {
+        "mulx {2}, {1}, {0}",
+        in(reg) b,
+        lateout(reg) *lo,
+        lateout(reg) *hi,
+        in("rdx") a
+    };
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cmovz_works() {
+        let mut n = 24;
+        cmovz(42, 42, &mut n);
+        assert_eq!(n, 24);
+        cmovz(0, 42, &mut n);
+        assert_eq!(n, 42);
+    }
+
+    #[test]
+    fn cmovnz_works() {
+        let mut n = 24;
+        cmovnz(0, 42, &mut n);
+        assert_eq!(n, 24);
+        cmovnz(42, 42, &mut n);
+        assert_eq!(n, 42);
+    }
+}
diff --git a/dbl/src/lib.rs b/dbl/src/lib.rs
@@ -29,12 +29,14 @@ pub trait Dbl {
     /// `block<<1`, otherwise `(block<<1)^C`, where `C` is the non-leading
     /// coefficients of the lexicographically first irreducible degree-b binary
     /// polynomial with the minimal number of ones.
+    #[must_use]
     fn dbl(self) -> Self;
 
     /// Reverse double block. (alternatively: divbide block by x)
     ///
     /// If least significant bit of the block equals to zero will return
     /// `block>>1`, otherwise `(block>>1)^(1<<n)^(C>>1)`
+    #[must_use]
     fn inv_dbl(self) -> Self;
 }