Skip to content

Commit 9043edb

Browse files
authored
BoxedResidue: avoid allocations in pow loop (#394)
Performs Montgomery multiplications and squarings in-place, avoiding allocations, by constructing a reusable `MontgomeryMultiplier`. Montgomery arithmetic/modpow, BoxedUint^BoxedUint time: [24.265 µs 24.274 µs 24.288 µs] change: [-24.321% -24.194% -24.081%] (p = 0.00 < 0.05) Performance has improved.
1 parent bc53cfe commit 9043edb

File tree

5 files changed

+116
-79
lines changed

5 files changed

+116
-79
lines changed

src/modular/boxed_residue/mul.rs

Lines changed: 86 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,36 @@
11
//! Multiplications between boxed residues.
22
3-
use super::{montgomery_reduction_boxed_mut, BoxedResidue};
4-
use crate::traits::Square;
5-
use crate::{BoxedUint, Limb};
6-
use core::ops::{Mul, MulAssign};
3+
use super::{montgomery_reduction_boxed_mut, BoxedResidue, BoxedResidueParams};
4+
use crate::{traits::Square, uint::mul::mul_limbs, BoxedUint, Limb};
5+
use core::{
6+
borrow::Borrow,
7+
ops::{Mul, MulAssign},
8+
};
9+
10+
#[cfg(feature = "zeroize")]
11+
use zeroize::Zeroize;
712

813
impl BoxedResidue {
914
/// Multiplies by `rhs`.
1015
pub fn mul(&self, rhs: &Self) -> Self {
1116
debug_assert_eq!(&self.residue_params, &rhs.residue_params);
1217

18+
let montgomery_form = MontgomeryMultiplier::from(self.residue_params.borrow())
19+
.mul(&self.montgomery_form, &rhs.montgomery_form);
20+
1321
Self {
14-
montgomery_form: mul_montgomery_form(
15-
&self.montgomery_form,
16-
&rhs.montgomery_form,
17-
&self.residue_params.modulus,
18-
self.residue_params.mod_neg_inv,
19-
),
22+
montgomery_form,
2023
residue_params: self.residue_params.clone(),
2124
}
2225
}
2326

2427
/// Computes the (reduced) square of a residue.
2528
pub fn square(&self) -> Self {
29+
let montgomery_form =
30+
MontgomeryMultiplier::from(self.residue_params.borrow()).square(&self.montgomery_form);
31+
2632
Self {
27-
montgomery_form: square_montgomery_form(
28-
&self.montgomery_form,
29-
&self.residue_params.modulus,
30-
self.residue_params.mod_neg_inv,
31-
),
33+
montgomery_form,
3234
residue_params: self.residue_params.clone(),
3335
}
3436
}
@@ -67,13 +69,8 @@ impl Mul<BoxedResidue> for BoxedResidue {
6769
impl MulAssign<&BoxedResidue> for BoxedResidue {
6870
fn mul_assign(&mut self, rhs: &BoxedResidue) {
6971
debug_assert_eq!(&self.residue_params, &rhs.residue_params);
70-
71-
self.montgomery_form = mul_montgomery_form(
72-
&self.montgomery_form,
73-
&rhs.montgomery_form,
74-
&self.residue_params.modulus,
75-
self.residue_params.mod_neg_inv,
76-
);
72+
MontgomeryMultiplier::from(self.residue_params.borrow())
73+
.mul_assign(&mut self.montgomery_form, &rhs.montgomery_form);
7774
}
7875
}
7976

@@ -89,55 +86,80 @@ impl Square for BoxedResidue {
8986
}
9087
}
9188

92-
pub(super) fn mul_montgomery_form(
93-
a: &BoxedUint,
94-
b: &BoxedUint,
95-
modulus: &BoxedUint,
96-
mod_neg_inv: Limb,
97-
) -> BoxedUint {
98-
let mut ret = a.clone();
99-
mul_montgomery_form_assign(&mut ret, b, modulus, mod_neg_inv);
100-
ret
89+
impl<'a> From<&'a BoxedResidueParams> for MontgomeryMultiplier<'a> {
90+
fn from(residue_params: &'a BoxedResidueParams) -> MontgomeryMultiplier<'a> {
91+
MontgomeryMultiplier::new(&residue_params.modulus, residue_params.mod_neg_inv)
92+
}
10193
}
10294

103-
pub(super) fn mul_montgomery_form_assign(
104-
a: &mut BoxedUint,
105-
b: &BoxedUint,
106-
modulus: &BoxedUint,
95+
/// Montgomery multiplier with a pre-allocated internal buffer to avoid additional allocations.
96+
pub(super) struct MontgomeryMultiplier<'a> {
97+
product: BoxedUint,
98+
modulus: &'a BoxedUint,
10799
mod_neg_inv: Limb,
108-
) {
109-
debug_assert_eq!(a.bits_precision(), modulus.bits_precision());
110-
debug_assert_eq!(b.bits_precision(), modulus.bits_precision());
100+
}
111101

112-
let mut product = a.mul(b);
113-
montgomery_reduction_boxed_mut(&mut product, modulus, mod_neg_inv, a);
102+
impl<'a> MontgomeryMultiplier<'a> {
103+
/// Create a new Montgomery multiplier.
104+
pub(super) fn new(modulus: &'a BoxedUint, mod_neg_inv: Limb) -> Self {
105+
Self {
106+
product: BoxedUint::zero_with_precision(modulus.bits_precision() * 2),
107+
modulus,
108+
mod_neg_inv,
109+
}
110+
}
114111

115-
#[cfg(feature = "zeroize")]
116-
zeroize::Zeroize::zeroize(&mut product);
117-
}
112+
/// Multiply two values in Montgomery form.
113+
pub(super) fn mul(&mut self, a: &BoxedUint, b: &BoxedUint) -> BoxedUint {
114+
let mut ret = a.clone();
115+
self.mul_assign(&mut ret, b);
116+
ret
117+
}
118118

119-
#[inline]
120-
pub(super) fn square_montgomery_form(
121-
a: &BoxedUint,
122-
modulus: &BoxedUint,
123-
mod_neg_inv: Limb,
124-
) -> BoxedUint {
125-
let mut ret = a.clone();
126-
square_montgomery_form_assign(&mut ret, modulus, mod_neg_inv);
127-
ret
128-
}
119+
/// Multiply two values in Montgomery form, assigning the product to `a`.
120+
pub(super) fn mul_assign(&mut self, a: &mut BoxedUint, b: &BoxedUint) {
121+
debug_assert_eq!(a.bits_precision(), self.modulus.bits_precision());
122+
debug_assert_eq!(b.bits_precision(), self.modulus.bits_precision());
129123

130-
#[inline]
131-
pub(super) fn square_montgomery_form_assign(
132-
a: &mut BoxedUint,
133-
modulus: &BoxedUint,
134-
mod_neg_inv: Limb,
135-
) {
136-
debug_assert_eq!(a.bits_precision(), modulus.bits_precision());
124+
self.clear_product();
125+
mul_limbs(&a.limbs, &b.limbs, &mut self.product.limbs);
126+
self.montgomery_reduction(a);
127+
}
128+
129+
/// Square the given value in Montgomery form.
130+
#[inline]
131+
pub(super) fn square(&mut self, a: &BoxedUint) -> BoxedUint {
132+
let mut ret = a.clone();
133+
self.square_assign(&mut ret);
134+
ret
135+
}
136+
137+
/// Square the given value in Montgomery form, assigning the result to `a`.
138+
#[inline]
139+
pub(super) fn square_assign(&mut self, a: &mut BoxedUint) {
140+
debug_assert_eq!(a.bits_precision(), self.modulus.bits_precision());
141+
self.clear_product();
142+
mul_limbs(&a.limbs, &a.limbs, &mut self.product.limbs);
143+
self.montgomery_reduction(a);
144+
}
145+
146+
/// Clear the internal product buffer.
147+
fn clear_product(&mut self) {
148+
self.product
149+
.limbs
150+
.iter_mut()
151+
.for_each(|limb| *limb = Limb::ZERO);
152+
}
137153

138-
let mut square = a.square();
139-
montgomery_reduction_boxed_mut(&mut square, modulus, mod_neg_inv, a);
154+
/// Perform Montgomery reduction on the internal product buffer.
155+
fn montgomery_reduction(&mut self, out: &mut BoxedUint) {
156+
montgomery_reduction_boxed_mut(&mut self.product, self.modulus, self.mod_neg_inv, out);
157+
}
158+
}
140159

141-
#[cfg(feature = "zeroize")]
142-
zeroize::Zeroize::zeroize(&mut square);
160+
#[cfg(feature = "zeroize")]
161+
impl Drop for MontgomeryMultiplier<'_> {
162+
fn drop(&mut self) {
163+
self.product.zeroize();
164+
}
143165
}

src/modular/boxed_residue/pow.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
//! Modular exponentiation support for [`BoxedResidue`].
22
3-
use super::{
4-
mul::{mul_montgomery_form, mul_montgomery_form_assign, square_montgomery_form_assign},
5-
BoxedResidue,
6-
};
3+
use super::{mul::MontgomeryMultiplier, BoxedResidue};
74
use crate::{BoxedUint, Limb, PowBoundedExp, Word};
5+
use alloc::vec::Vec;
86
use subtle::ConstantTimeEq;
97

108
impl BoxedResidue {
@@ -58,12 +56,15 @@ fn pow_montgomery_form(
5856
const WINDOW: u32 = 4;
5957
const WINDOW_MASK: Word = (1 << WINDOW) - 1;
6058

59+
let mut multiplier = MontgomeryMultiplier::new(modulus, mod_neg_inv);
60+
6161
// powers[i] contains x^i
62-
let mut powers = vec![r.clone(); 1 << WINDOW];
63-
powers[1] = x.clone();
62+
let mut powers = Vec::with_capacity(1 << WINDOW);
63+
powers.push(r.clone()); // 1 in Montgomery form
64+
powers.push(x.clone());
6465

65-
for i in 2..powers.len() {
66-
powers[i] = mul_montgomery_form(&powers[i - 1], x, modulus, mod_neg_inv);
66+
for i in 2..(1 << WINDOW) {
67+
powers.push(multiplier.mul(&powers[i - 1], x));
6768
}
6869

6970
let starting_limb = ((exponent_bits - 1) / Limb::BITS) as usize;
@@ -72,6 +73,7 @@ fn pow_montgomery_form(
7273
let starting_window_mask = (1 << (starting_bit_in_limb % WINDOW + 1)) - 1;
7374

7475
let mut z = r.clone(); // 1 in Montgomery form
76+
let mut power = powers[0].clone();
7577

7678
for limb_num in (0..=starting_limb).rev() {
7779
let w = exponent.as_limbs()[limb_num].0;
@@ -91,17 +93,17 @@ fn pow_montgomery_form(
9193
idx &= starting_window_mask;
9294
} else {
9395
for _ in 1..=WINDOW {
94-
square_montgomery_form_assign(&mut z, modulus, mod_neg_inv);
96+
multiplier.square_assign(&mut z);
9597
}
9698
}
9799

98100
// Constant-time lookup in the array of powers
99-
let mut power = powers[0].clone();
101+
power.limbs.copy_from_slice(&powers[0].limbs);
100102
for i in 1..(1 << WINDOW) {
101103
power.conditional_assign(&powers[i as usize], i.ct_eq(&idx));
102104
}
103105

104-
mul_montgomery_form_assign(&mut z, &power, modulus, mod_neg_inv);
106+
multiplier.mul_assign(&mut z, &power);
105107
}
106108
}
107109

src/uint.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub(crate) mod div_limb;
1919
mod encoding;
2020
mod from;
2121
mod inv_mod;
22-
mod mul;
22+
pub(crate) mod mul;
2323
mod mul_mod;
2424
mod neg;
2525
mod neg_mod;

src/uint/boxed.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -380,13 +380,19 @@ impl From<&[Limb]> for BoxedUint {
380380

381381
impl From<Box<[Limb]>> for BoxedUint {
382382
fn from(limbs: Box<[Limb]>) -> BoxedUint {
383-
Self { limbs }
383+
Vec::from(limbs).into()
384384
}
385385
}
386386

387387
impl From<Vec<Limb>> for BoxedUint {
388-
fn from(limbs: Vec<Limb>) -> BoxedUint {
389-
limbs.into_boxed_slice().into()
388+
fn from(mut limbs: Vec<Limb>) -> BoxedUint {
389+
if limbs.is_empty() {
390+
limbs.push(Limb::ZERO);
391+
}
392+
393+
Self {
394+
limbs: limbs.into_boxed_slice(),
395+
}
390396
}
391397
}
392398

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Seeds for failure cases proptest has generated in the past. It is
2+
# automatically read and these particular cases re-run before any
3+
# novel cases are generated.
4+
#
5+
# It is recommended to check this file in to source control so that
6+
# everyone who runs the test benefits from these saved cases.
7+
cc a2dbf0ee6304db81982e37ad9d9145a0f9de45730b9c41221dbf9fdfb9a246c5 # shrinks to a = BoxedUint(0x0000000000000002), b = BoxedUint(0x0000000000000100), n = BoxedResidueParams { modulus: BoxedUint(0x0000000000000003), r: BoxedUint(0x0000000000000001), r2: BoxedUint(0x0000000000000001), r3: BoxedUint(0x0000000000000001), mod_neg_inv: Limb(0x5555555555555555) }

0 commit comments

Comments
 (0)