Skip to content

Commit 99b219a

Browse files
authored
Performance improvements in aes_soft (#166)
1 parent 75c2c21 commit 99b219a

File tree

3 files changed

+120
-100
lines changed

3 files changed

+120
-100
lines changed

aes/aes-soft/src/bitslice.rs

Lines changed: 103 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -808,70 +808,96 @@ impl<T: AesBitValueOps + Copy + 'static> AesOps for Bs8State<T> {
808808
)
809809
}
810810

811-
// Formula from [5]
811+
// See "Faster and Timing-Attack Resistant AES-GCM". Emilia Kaesper and Peter Schwabe.
812+
#[rustfmt::skip]
812813
fn mix_columns(self) -> Bs8State<T> {
813814
let Bs8State(x0, x1, x2, x3, x4, x5, x6, x7) = self;
814-
815-
let x0out = x7 ^ x7.ror1() ^ x0.ror1() ^ (x0 ^ x0.ror1()).ror2();
816-
let x1out = x0 ^ x0.ror1() ^ x7 ^ x7.ror1() ^ x1.ror1() ^ (x1 ^ x1.ror1()).ror2();
817-
let x2out = x1 ^ x1.ror1() ^ x2.ror1() ^ (x2 ^ x2.ror1()).ror2();
818-
let x3out = x2 ^ x2.ror1() ^ x7 ^ x7.ror1() ^ x3.ror1() ^ (x3 ^ x3.ror1()).ror2();
819-
let x4out = x3 ^ x3.ror1() ^ x7 ^ x7.ror1() ^ x4.ror1() ^ (x4 ^ x4.ror1()).ror2();
820-
let x5out = x4 ^ x4.ror1() ^ x5.ror1() ^ (x5 ^ x5.ror1()).ror2();
821-
let x6out = x5 ^ x5.ror1() ^ x6.ror1() ^ (x6 ^ x6.ror1()).ror2();
822-
let x7out = x6 ^ x6.ror1() ^ x7.ror1() ^ (x7 ^ x7.ror1()).ror2();
823-
824-
Bs8State(x0out, x1out, x2out, x3out, x4out, x5out, x6out, x7out)
815+
let (t0, t1, t2, t3, t4, t5, t6, t7) = (
816+
x0.ror1(),
817+
x1.ror1(),
818+
x2.ror1(),
819+
x3.ror1(),
820+
x4.ror1(),
821+
x5.ror1(),
822+
x6.ror1(),
823+
x7.ror1(),
824+
);
825+
let (u0, u1, u2, u3, u4, u5, u6, u7) = (
826+
x0 ^ t0,
827+
x1 ^ t1,
828+
x2 ^ t2,
829+
x3 ^ t3,
830+
x4 ^ t4,
831+
x5 ^ t5,
832+
x6 ^ t6,
833+
x7 ^ t7,
834+
);
835+
Bs8State(
836+
t0 ^ u7 ^ u0.ror2(),
837+
t1 ^ u0 ^ u7 ^ u1.ror2(),
838+
t2 ^ u1 ^ u2.ror2(),
839+
t3 ^ u2 ^ u7 ^ u3.ror2(),
840+
t4 ^ u3 ^ u7 ^ u4.ror2(),
841+
t5 ^ u4 ^ u5.ror2(),
842+
t6 ^ u5 ^ u6.ror2(),
843+
t7 ^ u6 ^ u7.ror2(),
844+
)
825845
}
826846

827-
// Formula from [6]
847+
// Formula derived using same approach as for 'mix_columns'
848+
#[rustfmt::skip]
828849
fn inv_mix_columns(self) -> Bs8State<T> {
829850
let Bs8State(x0, x1, x2, x3, x4, x5, x6, x7) = self;
830-
831-
let x0out = x5 ^ x6 ^ x7 ^ (x5 ^ x7 ^ x0).ror1() ^ (x0 ^ x5 ^ x6).ror2() ^ (x5 ^ x0).ror3();
832-
let x1out = x5
833-
^ x0
834-
^ (x6 ^ x5 ^ x0 ^ x7 ^ x1).ror1()
835-
^ (x1 ^ x7 ^ x5).ror2()
836-
^ (x6 ^ x5 ^ x1).ror3();
837-
let x2out = x6
838-
^ x0
839-
^ x1
840-
^ (x7 ^ x6 ^ x1 ^ x2).ror1()
841-
^ (x0 ^ x2 ^ x6).ror2()
842-
^ (x7 ^ x6 ^ x2).ror3();
843-
let x3out = x0
844-
^ x5
845-
^ x1
846-
^ x6
847-
^ x2
848-
^ (x0 ^ x5 ^ x2 ^ x3).ror1()
849-
^ (x0 ^ x1 ^ x3 ^ x5 ^ x6 ^ x7).ror2()
850-
^ (x0 ^ x5 ^ x7 ^ x3).ror3();
851-
let x4out = x1
852-
^ x5
853-
^ x2
854-
^ x3
855-
^ (x1 ^ x6 ^ x5 ^ x3 ^ x7 ^ x4).ror1()
856-
^ (x1 ^ x2 ^ x4 ^ x5 ^ x7).ror2()
857-
^ (x1 ^ x5 ^ x6 ^ x4).ror3();
858-
let x5out = x2
859-
^ x6
860-
^ x3
861-
^ x4
862-
^ (x2 ^ x7 ^ x6 ^ x4 ^ x5).ror1()
863-
^ (x2 ^ x3 ^ x5 ^ x6).ror2()
864-
^ (x2 ^ x6 ^ x7 ^ x5).ror3();
865-
let x6out = x3
866-
^ x7
867-
^ x4
868-
^ x5
869-
^ (x3 ^ x7 ^ x5 ^ x6).ror1()
870-
^ (x3 ^ x4 ^ x6 ^ x7).ror2()
871-
^ (x3 ^ x7 ^ x6).ror3();
872-
let x7out = x4 ^ x5 ^ x6 ^ (x4 ^ x6 ^ x7).ror1() ^ (x4 ^ x5 ^ x7).ror2() ^ (x4 ^ x7).ror3();
873-
874-
Bs8State(x0out, x1out, x2out, x3out, x4out, x5out, x6out, x7out)
851+
let (t0, t1, t2, t3, t4, t5, t6, t7) = (
852+
x0.ror1(),
853+
x1.ror1(),
854+
x2.ror1(),
855+
x3.ror1(),
856+
x4.ror1(),
857+
x5.ror1(),
858+
x6.ror1(),
859+
x7.ror1(),
860+
);
861+
let (u0, u1, u2, u3, u4, u5, u6, u7) = (
862+
x0 ^ t0,
863+
x1 ^ t1,
864+
x2 ^ t2,
865+
x3 ^ t3,
866+
x4 ^ t4,
867+
x5 ^ t5,
868+
x6 ^ t6,
869+
x7 ^ t7,
870+
);
871+
let (v0, v1, v2, v3, v4, v5, v6, v7) = (
872+
x0 ^ u7,
873+
x1 ^ u0 ^ u7,
874+
x2 ^ u1,
875+
x3 ^ u2 ^ u7,
876+
x4 ^ u3 ^ u7,
877+
x5 ^ u4,
878+
x6 ^ u5,
879+
x7 ^ u6,
880+
);
881+
let (w0, w1, w2, w3, w4, w5, w6, w7) = (
882+
u0 ^ v6,
883+
u1 ^ v6 ^ v7,
884+
u2 ^ v0 ^ v7,
885+
u3 ^ v1 ^ v6,
886+
u4 ^ v2 ^ v6 ^ v7,
887+
u5 ^ v3 ^ v7,
888+
u6 ^ v4,
889+
u7 ^ v5,
890+
);
891+
Bs8State(
892+
v0 ^ w0 ^ w0.ror2(),
893+
v1 ^ w1 ^ w1.ror2(),
894+
v2 ^ w2 ^ w2.ror2(),
895+
v3 ^ w3 ^ w3.ror2(),
896+
v4 ^ w4 ^ w4.ror2(),
897+
v5 ^ w5 ^ w5.ror2(),
898+
v6 ^ w6 ^ w6.ror2(),
899+
v7 ^ w7 ^ w7.ror2(),
900+
)
875901
}
876902

877903
fn add_round_key(self, rk: &Bs8State<T>) -> Bs8State<T> {
@@ -886,42 +912,32 @@ pub trait AesBitValueOps:
886912
fn inv_shift_row(self) -> Self;
887913
fn ror1(self) -> Self;
888914
fn ror2(self) -> Self;
889-
fn ror3(self) -> Self;
915+
}
916+
917+
// The bits of 'x' selected by 'm' are swapped with those selected by '(m << s)'.
918+
// Requires that 'm & (m << s) == 0' (no overlap) and '((m << s) >> s) == m' (no loss).
919+
fn delta_swap(x: u16, m: u16, s: u16) -> u16 {
920+
let t = (x ^ (x >> s)) & m;
921+
x ^ (t ^ (t << s))
890922
}
891923

892924
impl AesBitValueOps for u16 {
893925
fn shift_row(self) -> u16 {
894-
// first 4 bits represent first row - don't shift
895-
(self & 0x000f) |
896-
// next 4 bits represent 2nd row - left rotate 1 bit
897-
((self & 0x00e0) >> 1) | ((self & 0x0010) << 3) |
898-
// next 4 bits represent 3rd row - left rotate 2 bits
899-
((self & 0x0c00) >> 2) | ((self & 0x0300) << 2) |
900-
// next 4 bits represent 4th row - left rotate 3 bits
901-
((self & 0x8000) >> 3) | ((self & 0x7000) << 1)
926+
let temp = delta_swap(self, 0x2310, 2);
927+
delta_swap(temp, 0x5050, 1)
902928
}
903929

904930
fn inv_shift_row(self) -> u16 {
905-
// first 4 bits represent first row - don't shift
906-
(self & 0x000f) |
907-
// next 4 bits represent 2nd row - right rotate 1 bit
908-
((self & 0x0080) >> 3) | ((self & 0x0070) << 1) |
909-
// next 4 bits represent 3rd row - right rotate 2 bits
910-
((self & 0x0c00) >> 2) | ((self & 0x0300) << 2) |
911-
// next 4 bits represent 4th row - right rotate 3 bits
912-
((self & 0xe000) >> 1) | ((self & 0x1000) << 3)
931+
let temp = delta_swap(self, 0x5050, 1);
932+
delta_swap(temp, 0x2310, 2)
913933
}
914934

915935
fn ror1(self) -> u16 {
916-
self >> 4 | self << 12
936+
self.rotate_right(4)
917937
}
918938

919939
fn ror2(self) -> u16 {
920-
self >> 8 | self << 8
921-
}
922-
923-
fn ror3(self) -> u16 {
924-
self >> 12 | self << 4
940+
self.rotate_right(8)
925941
}
926942
}
927943

@@ -966,19 +982,19 @@ impl AesBitValueOps for u32x4 {
966982
let u32x4(a0, a1, a2, a3) = self;
967983
u32x4(
968984
a0,
969-
a1 >> 8 | a1 << 24,
970-
a2 >> 16 | a2 << 16,
971-
a3 >> 24 | a3 << 8,
985+
a1.rotate_right(8),
986+
a2.rotate_right(16),
987+
a3.rotate_right(24),
972988
)
973989
}
974990

975991
fn inv_shift_row(self) -> u32x4 {
976992
let u32x4(a0, a1, a2, a3) = self;
977993
u32x4(
978994
a0,
979-
a1 >> 24 | a1 << 8,
980-
a2 >> 16 | a2 << 16,
981-
a3 >> 8 | a3 << 24,
995+
a1.rotate_left(8),
996+
a2.rotate_left(16),
997+
a3.rotate_left(24),
982998
)
983999
}
9841000

@@ -991,9 +1007,4 @@ impl AesBitValueOps for u32x4 {
9911007
let u32x4(a0, a1, a2, a3) = self;
9921008
u32x4(a2, a3, a0, a1)
9931009
}
994-
995-
fn ror3(self) -> u32x4 {
996-
let u32x4(a0, a1, a2, a3) = self;
997-
u32x4(a3, a0, a1, a2)
998-
}
9991010
}

aes/aes-soft/src/expand.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,22 @@ fn ffmulx(x: u32) -> u32 {
1616
((x & m2) << 1) ^ (((x & m1) >> 7) * m3)
1717
}
1818

19-
fn inv_mcol(x: u32) -> u32 {
20-
let f2 = ffmulx(x);
21-
let f4 = ffmulx(f2);
22-
let f8 = ffmulx(f4);
23-
let f9 = x ^ f8;
19+
// ffmulx applied twice
20+
fn ffmulx2(x: u32) -> u32 {
21+
let m4: u32 = 0xC0C0C0C0;
22+
let m5: u32 = 0x3f3f3f3f;
23+
let t0 = (x & m5) << 2;
24+
let t1 = x & m4;
25+
let t2 = t1 ^ (t1 >> 1);
26+
t0 ^ (t2 >> 2) ^ (t2 >> 5)
27+
}
2428

25-
f2 ^ f4 ^ f8 ^ (f2 ^ f9).rotate_right(8) ^ (f4 ^ f9).rotate_right(16) ^ f9.rotate_right(24)
29+
fn inv_mcol(x: u32) -> u32 {
30+
let t0 = x;
31+
let t1 = t0 ^ t0.rotate_right(8);
32+
let t2 = t0 ^ ffmulx(t1);
33+
let t3 = t1 ^ ffmulx2(t2);
34+
t2 ^ t3 ^ t3.rotate_right(16)
2635
}
2736

2837
fn sub_word(x: u32) -> u32 {

block-modes/src/traits.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ where
6363
fn decrypt(mut self, buffer: &mut [u8]) -> Result<&[u8], BlockModeError> {
6464
let bs = C::BlockSize::to_usize();
6565
if buffer.len() % bs != 0 {
66-
Err(BlockModeError)?
66+
return Err(BlockModeError);
6767
}
6868
self.decrypt_blocks(to_blocks(buffer));
6969
P::unpad(buffer).map_err(|_| BlockModeError)
@@ -94,7 +94,7 @@ where
9494
fn decrypt_vec(mut self, ciphertext: &[u8]) -> Result<Vec<u8>, BlockModeError> {
9595
let bs = C::BlockSize::to_usize();
9696
if ciphertext.len() % bs != 0 {
97-
Err(BlockModeError)?
97+
return Err(BlockModeError);
9898
}
9999
let mut buf = ciphertext.to_vec();
100100
self.decrypt_blocks(to_blocks(&mut buf));

0 commit comments

Comments
 (0)