Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support AES accelerating instructions #107

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/aarch64/assembler-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6021,6 +6021,38 @@ void Assembler::sha512su1(const VRegister& vd, const VRegister& vn, const VRegis
Emit(0xce608800 | Rd(vd) | Rn(vn) | Rm(vm));
}

void Assembler::aesd(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
VIXL_ASSERT(vd.Is16B() && vn.Is16B());

Emit(0x4e285800 | Rd(vd) | Rn(vn));
}

void Assembler::aese(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
VIXL_ASSERT(vd.Is16B() && vn.Is16B());

Emit(0x4e284800 | Rd(vd) | Rn(vn));
}

void Assembler::aesimc(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
VIXL_ASSERT(vd.Is16B() && vn.Is16B());

Emit(0x4e287800 | Rd(vd) | Rn(vn));
}

void Assembler::aesmc(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
VIXL_ASSERT(vd.Is16B() && vn.Is16B());

Emit(0x4e286800 | Rd(vd) | Rn(vn));
}

// Note:
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
Expand Down
12 changes: 12 additions & 0 deletions src/aarch64/assembler-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -3684,6 +3684,18 @@ class Assembler : public vixl::internal::AssemblerBase {
// SHA512 schedule Update 1.
void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);

// AES single round decryption.
void aesd(const VRegister& vd, const VRegister& vn);

// AES single round encryption.
void aese(const VRegister& vd, const VRegister& vn);

// AES inverse mix columns.
void aesimc(const VRegister& vd, const VRegister& vn);

// AES mix columns.
void aesmc(const VRegister& vd, const VRegister& vn);

// Scalable Vector Extensions.

// Absolute value (predicated).
Expand Down
1 change: 1 addition & 0 deletions src/aarch64/cpu-features-auditor-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) {

void CPUFeaturesAuditor::VisitCryptoAES(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
scope.Record(CPUFeatures::kNEON, CPUFeatures::kAES);
USE(instr);
}

Expand Down
2 changes: 1 addition & 1 deletion src/aarch64/disasm-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2201,7 +2201,7 @@ void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) {


void Disassembler::VisitCryptoAES(const Instruction *instr) {
VisitUnimplemented(instr);
FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b");
}

void Disassembler::DisassembleSHA512(const Instruction *instr) {
Expand Down
280 changes: 280 additions & 0 deletions src/aarch64/logic-aarch64.cc

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions src/aarch64/macro-assembler-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -2906,6 +2906,10 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
V(abs, Abs) \
V(addp, Addp) \
V(addv, Addv) \
V(aesd, Aesd) \
V(aese, Aese) \
V(aesimc, Aesimc) \
V(aesmc, Aesmc) \
V(cls, Cls) \
V(clz, Clz) \
V(cnt, Cnt) \
Expand Down
21 changes: 20 additions & 1 deletion src/aarch64/simulator-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7239,7 +7239,26 @@ void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {


void Simulator::VisitCryptoAES(const Instruction* instr) {
VisitUnimplemented(instr);
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
SimVRegister temp;

switch (form_hash_) {
case "aesd_b_cryptoaes"_h:
eor(kFormat16B, temp, rd, rn);
aes(rd, temp, /* decrypt = */ true);
break;
case "aese_b_cryptoaes"_h:
eor(kFormat16B, temp, rd, rn);
aes(rd, temp, /* decrypt = */ false);
break;
case "aesimc_b_cryptoaes"_h:
aesmix(rd, rn, /* inverse = */ true);
break;
case "aesmc_b_cryptoaes"_h:
aesmix(rd, rn, /* inverse = */ false);
break;
}
}

void Simulator::SimulateSHA512(const Instruction* instr) {
Expand Down
8 changes: 8 additions & 0 deletions src/aarch64/simulator-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -4553,6 +4553,14 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src1,
const LogicVRegister& src2);


LogicVRegister aes(LogicVRegister srcdst,
const LogicVRegister& src1,
bool decrypt);
LogicVRegister aesmix(LogicVRegister srcdst,
const LogicVRegister& src1,
bool inverse);

#define NEON_3VREG_LOGIC_LIST(V) \
V(addhn) \
V(addhn2) \
Expand Down
10 changes: 10 additions & 0 deletions test/aarch64/test-cpu-features-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3825,5 +3825,15 @@ TEST_FEAT(sha512su0_0, sha512su0(v2.V2D(), v4.V2D()))
TEST_FEAT(sha512su1_0, sha512su1(v19.V2D(), v9.V2D(), v27.V2D()))
#undef TEST_FEAT

#define TEST_FEAT(NAME, ASM) \
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kAES), \
NEON_AES_##NAME, \
ASM)
TEST_FEAT(aesd_0, aesd(v0.V16B(), v29.V16B()))
TEST_FEAT(aese_0, aese(v0.V16B(), v29.V16B()))
TEST_FEAT(aesimc_0, aesimc(v0.V16B(), v29.V16B()))
TEST_FEAT(aesmc_0, aesmc(v0.V16B(), v29.V16B()))
#undef TEST_FEAT

} // namespace aarch64
} // namespace vixl
11 changes: 11 additions & 0 deletions test/aarch64/test-disasm-neon-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4568,6 +4568,17 @@ TEST(neon_sha512) {
CLEANUP();
}

TEST(neon_aes) {
SETUP();

COMPARE_MACRO(Aesd(v0.V16B(), v29.V16B()), "aesd v0.16b, v29.16b");
COMPARE_MACRO(Aese(v0.V16B(), v29.V16B()), "aese v0.16b, v29.16b");
COMPARE_MACRO(Aesimc(v0.V16B(), v29.V16B()), "aesimc v0.16b, v29.16b");
COMPARE_MACRO(Aesmc(v0.V16B(), v29.V16B()), "aesmc v0.16b, v29.16b");

CLEANUP();
}

TEST(neon_unallocated_regression_test) {
SETUP();

Expand Down
208 changes: 208 additions & 0 deletions test/aarch64/test-simulator-sve-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1326,5 +1326,213 @@ TEST_SVE(neon_sha512su0) {
}
}

TEST_SVE(neon_aes) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kAES);
START();

SetInitialMachineState(&masm);
// state = 0xe2bd2480

{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x4e285a86); // aesd v6.16b, v20.16b
// vl128 state = 0x801bfc08
__ dci(0x4e2858ae); // aesd v14.16b, v5.16b
// vl128 state = 0xbd83a757
__ dci(0x4e2858ac); // aesd v12.16b, v5.16b
// vl128 state = 0x9fb1dc6b
__ dci(0x4e2858ae); // aesd v14.16b, v5.16b
// vl128 state = 0xfa1fa7e4
__ dci(0x4e28482a); // aese v10.16b, v1.16b
// vl128 state = 0xecfcfe2d
__ dci(0x4e28483a); // aese v26.16b, v1.16b
// vl128 state = 0x05e22f07
__ dci(0x4e28488a); // aese v10.16b, v4.16b
// vl128 state = 0xdd53df5f
__ dci(0x4e28488e); // aese v14.16b, v4.16b
// vl128 state = 0x9d2ac50f
__ dci(0x4e28484f); // aese v15.16b, v2.16b
// vl128 state = 0xf45146ab
__ dci(0x4e28484b); // aese v11.16b, v2.16b
// vl128 state = 0xf1260a7c
__ dci(0x4e28485b); // aese v27.16b, v2.16b
// vl128 state = 0x3a0844da
__ dci(0x4e285819); // aesd v25.16b, v0.16b
// vl128 state = 0xaca89993
__ dci(0x4e284a09); // aese v9.16b, v16.16b
// vl128 state = 0xef4e9a5f
__ dci(0x4e285a4b); // aesd v11.16b, v18.16b
// vl128 state = 0x209a44bc
__ dci(0x4e285a4f); // aesd v15.16b, v18.16b
// vl128 state = 0xc6d2d718
__ dci(0x4e285a4d); // aesd v13.16b, v18.16b
// vl128 state = 0x1aceef8f
__ dci(0x4e285a45); // aesd v5.16b, v18.16b
// vl128 state = 0x7ed056c6
__ dci(0x4e285af5); // aesd v21.16b, v23.16b
// vl128 state = 0x429ed71e
__ dci(0x4e285a91); // aesd v17.16b, v20.16b
// vl128 state = 0xd7a1f687
__ dci(0x4e284ad9); // aese v25.16b, v22.16b
// vl128 state = 0x8fa44574
__ dci(0x4e284adb); // aese v27.16b, v22.16b
// vl128 state = 0xd2792169
__ dci(0x4e285afa); // aesd v26.16b, v23.16b
// vl128 state = 0xe502f095
__ dci(0x4e285bbb); // aesd v27.16b, v29.16b
// vl128 state = 0x0e3d3238
__ dci(0x4e285bbf); // aesd v31.16b, v29.16b
// vl128 state = 0x0ad06592
__ dci(0x4e285baf); // aesd v15.16b, v29.16b
// vl128 state = 0xb94f3c19
__ dci(0x4e284b3f); // aese v31.16b, v25.16b
// vl128 state = 0xf31a0da1
__ dci(0x4e284917); // aese v23.16b, v8.16b
// vl128 state = 0x7d2d7811
__ dci(0x4e284913); // aese v19.16b, v8.16b
// vl128 state = 0x41b7b854
__ dci(0x4e284911); // aese v17.16b, v8.16b
// vl128 state = 0x60600536
__ dci(0x4e2849d5); // aese v21.16b, v14.16b
// vl128 state = 0x3e0cc74f
}

uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));

END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x3e0cc74f,
0x7f17ba2e,
0xd59f8e91,
0x9f15a51b,
0x11d92e66,
0xcd53d015,
0xbc652785,
0x6974fa54,
0x953d342e,
0xf1aa56b3,
0xde8ca1d3,
0xba408b82,
0x48094fa4,
0xb757bcf1,
0x2cc5be58,
0x6e7a0f58,
};
ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
}
}

TEST_SVE(neon_aesmc) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kAES);
START();

SetInitialMachineState(&masm);
// state = 0xe2bd2480

{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x4e287800); // aesimc v0.16b, v0.16b
// vl128 state = 0x03554749
__ dci(0x4e287a28); // aesimc v8.16b, v17.16b
// vl128 state = 0x59d5fedd
__ dci(0x4e287a2a); // aesimc v10.16b, v17.16b
// vl128 state = 0xcda29514
__ dci(0x4e286aae); // aesmc v14.16b, v21.16b
// vl128 state = 0xae8f019a
__ dci(0x4e286abe); // aesmc v30.16b, v21.16b
// vl128 state = 0x7b04c6c0
__ dci(0x4e286a0e); // aesmc v14.16b, v16.16b
// vl128 state = 0xaf6c5ce6
__ dci(0x4e286a0a); // aesmc v10.16b, v16.16b
// vl128 state = 0xf1d7fd2b
__ dci(0x4e286acb); // aesmc v11.16b, v22.16b
// vl128 state = 0x5d693c63
__ dci(0x4e286acf); // aesmc v15.16b, v22.16b
// vl128 state = 0xec8971ad
__ dci(0x4e286adf); // aesmc v31.16b, v22.16b
// vl128 state = 0x6389b200
__ dci(0x4e287a9d); // aesimc v29.16b, v20.16b
// vl128 state = 0xd69341fb
__ dci(0x4e28688d); // aesmc v13.16b, v4.16b
// vl128 state = 0x6344af95
__ dci(0x4e2878cf); // aesimc v15.16b, v6.16b
// vl128 state = 0x5c58dfac
__ dci(0x4e2878cb); // aesimc v11.16b, v6.16b
// vl128 state = 0x7dc9cf34
__ dci(0x4e2878c9); // aesimc v9.16b, v6.16b
// vl128 state = 0xff4b3544
__ dci(0x4e2878c1); // aesimc v1.16b, v6.16b
// vl128 state = 0xd1937de2
__ dci(0x4e287871); // aesimc v17.16b, v3.16b
// vl128 state = 0x7cabd208
__ dci(0x4e287815); // aesimc v21.16b, v0.16b
// vl128 state = 0xbc06df94
__ dci(0x4e28685d); // aesmc v29.16b, v2.16b
// vl128 state = 0xfc4478bb
__ dci(0x4e28685f); // aesmc v31.16b, v2.16b
// vl128 state = 0x0c72c200
__ dci(0x4e28787e); // aesimc v30.16b, v3.16b
// vl128 state = 0xdd822b9d
__ dci(0x4e28793f); // aesimc v31.16b, v9.16b
// vl128 state = 0x1397dcc6
__ dci(0x4e28793b); // aesimc v27.16b, v9.16b
// vl128 state = 0x43f3abd6
__ dci(0x4e28792b); // aesimc v11.16b, v9.16b
// vl128 state = 0xeb8ca365
__ dci(0x4e2869bb); // aesmc v27.16b, v13.16b
// vl128 state = 0x0a957f4f
__ dci(0x4e286b93); // aesmc v19.16b, v28.16b
// vl128 state = 0xbc5da8bd
__ dci(0x4e286b97); // aesmc v23.16b, v28.16b
// vl128 state = 0xc49343cc
__ dci(0x4e286b95); // aesmc v21.16b, v28.16b
// vl128 state = 0x8c80c144
__ dci(0x4e286b51); // aesmc v17.16b, v26.16b
// vl128 state = 0xeda3255d
__ dci(0x4e2869d3); // aesmc v19.16b, v14.16b
// vl128 state = 0x8db8a9d0
}

uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));

END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x8db8a9d0,
0xb13d8e1e,
0x9f33ca70,
0x38f7ef7a,
0x65352b29,
0xc4257260,
0xf49587c2,
0xb3f61256,
0x8ef4a534,
0x6e061aa9,
0x7270527d,
0x3e1f82f9,
0x1fe79e60,
0x985cab68,
0xe77b4484,
0xe3817f4e,
};
ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
}
}

} // namespace aarch64
} // namespace vixl