From 3e916a904712c19bb76bc4c2ae190e7702e135b2 Mon Sep 17 00:00:00 2001 From: Martyn Capewell Date: Fri, 3 Nov 2023 10:17:56 +0000 Subject: [PATCH] Support AES accelerating instructions Add support for four Neon AES accelerating instructions. --- src/aarch64/assembler-aarch64.cc | 32 +++ src/aarch64/assembler-aarch64.h | 12 + src/aarch64/cpu-features-auditor-aarch64.cc | 1 + src/aarch64/disasm-aarch64.cc | 2 +- src/aarch64/logic-aarch64.cc | 280 ++++++++++++++++++++ src/aarch64/macro-assembler-aarch64.h | 4 + src/aarch64/simulator-aarch64.cc | 21 +- src/aarch64/simulator-aarch64.h | 8 + test/aarch64/test-cpu-features-aarch64.cc | 10 + test/aarch64/test-disasm-neon-aarch64.cc | 11 + test/aarch64/test-simulator-sve-aarch64.cc | 208 +++++++++++++++ 11 files changed, 587 insertions(+), 2 deletions(-) diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index f423a23e..de2dbc99 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -6021,6 +6021,38 @@ void Assembler::sha512su1(const VRegister& vd, const VRegister& vn, const VRegis Emit(0xce608800 | Rd(vd) | Rn(vn) | Rm(vm)); } +void Assembler::aesd(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kAES)); + VIXL_ASSERT(vd.Is16B() && vn.Is16B()); + + Emit(0x4e285800 | Rd(vd) | Rn(vn)); +} + +void Assembler::aese(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kAES)); + VIXL_ASSERT(vd.Is16B() && vn.Is16B()); + + Emit(0x4e284800 | Rd(vd) | Rn(vn)); +} + +void Assembler::aesimc(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kAES)); + VIXL_ASSERT(vd.Is16B() && vn.Is16B()); + + Emit(0x4e287800 | Rd(vd) | Rn(vn)); +} + +void Assembler::aesmc(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kAES)); + VIXL_ASSERT(vd.Is16B() && vn.Is16B()); + + Emit(0x4e286800 | Rd(vd) | Rn(vn)); +} + // Note: // For all ToImm instructions below, a difference in case // for the same letter indicates a negated bit. diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 9887295a..cc4b3467 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -3684,6 +3684,18 @@ class Assembler : public vixl::internal::AssemblerBase { // SHA512 schedule Update 1. void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // AES single round decryption. + void aesd(const VRegister& vd, const VRegister& vn); + + // AES single round encryption. + void aese(const VRegister& vd, const VRegister& vn); + + // AES inverse mix columns. + void aesimc(const VRegister& vd, const VRegister& vn); + + // AES mix columns. + void aesmc(const VRegister& vd, const VRegister& vn); + // Scalable Vector Extensions. // Absolute value (predicated). diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc index 1c2d20bd..d815924a 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.cc +++ b/src/aarch64/cpu-features-auditor-aarch64.cc @@ -275,6 +275,7 @@ void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) { void CPUFeaturesAuditor::VisitCryptoAES(const Instruction* instr) { RecordInstructionFeaturesScope scope(this); + scope.Record(CPUFeatures::kNEON, CPUFeatures::kAES); USE(instr); } diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 87cc62d6..f03b2572 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -2201,7 +2201,7 @@ void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) { void Disassembler::VisitCryptoAES(const Instruction *instr) { - VisitUnimplemented(instr); + FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b"); } void Disassembler::DisassembleSHA512(const Instruction *instr) { diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc index a1870e10..9a81e49c 100644 --- a/src/aarch64/logic-aarch64.cc +++ b/src/aarch64/logic-aarch64.cc @@ -8071,6 +8071,286 @@ LogicVRegister Simulator::sha512su1(LogicVRegister srcdst, return srcdst; } +static uint8_t GalMul(int table, uint64_t x) { + // Galois multiplication lookup tables. + static const uint8_t ffmul02[256] = { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, + 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, + 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, + 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, + 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, + 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, + 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, + 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d, + 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, + 0x23, 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, + 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, 0x7b, 0x79, 0x7f, 0x7d, + 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, + 0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, + 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd, + 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, + 0xe3, 0xe1, 0xe7, 0xe5, + }; + + static const uint8_t ffmul03[256] = { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, + 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, + 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, + 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, + 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, + 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, + 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, + 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, + 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e, + 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, + 0xbf, 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, + 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, 0xcb, 0xc8, 0xcd, 0xce, + 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, + 0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, + 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e, + 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, + 0x1f, 0x1c, 0x19, 0x1a, + }; + + static const uint8_t ffmul09[256] = { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, + 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, + 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3b, 0x32, 0x29, 0x20, + 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, + 0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, + 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd, + 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, + 0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, + 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7, + 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, + 0x10, 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, + 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0x47, 0x4e, 0x55, 0x5c, + 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, + 0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, + 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba, + 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, + 0x5d, 0x54, 0x4f, 0x46, + }; + + static const uint8_t ffmul0b[256] = { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, + 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, + 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7b, 0x70, 0x6d, 0x66, + 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, + 0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, + 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b, + 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, + 0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, + 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea, + 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, + 0x33, 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, + 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, 0x3c, 0x37, 0x2a, 0x21, + 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, + 0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, + 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67, + 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, + 0xbe, 0xb5, 0xa8, 0xa3, + }; + + static const uint8_t ffmul0d[256] = { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, + 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, + 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbb, 0xb6, 0xa1, 0xac, + 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, + 0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, + 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa, + 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, + 0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, + 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd, + 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, + 0x56, 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, + 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, 0xb1, 0xbc, 0xab, 0xa6, + 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, + 0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, + 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b, + 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, + 0x80, 0x8d, 0x9a, 0x97, + }; + + static const uint8_t ffmul0e[256] = { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, + 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, + 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdb, 0xd5, 0xc7, 0xc9, + 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, + 0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, + 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f, + 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, + 0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, + 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53, + 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, + 0xe9, 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, + 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x7a, 0x74, 0x66, 0x68, + 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, + 0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, + 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25, + 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, + 0x9f, 0x91, 0x83, 0x8d, + }; + + x &= 255; + switch (table) { + case 0x2: + return ffmul02[x]; + case 0x3: + return ffmul03[x]; + case 0x9: + return ffmul09[x]; + case 0xb: + return ffmul0b[x]; + case 0xd: + return ffmul0d[x]; + case 0xe: + return ffmul0e[x]; + case 0: + // Case 0 indicates no table lookup, used for some forward mix stages. + return x; + default: + VIXL_UNREACHABLE(); + return x; + } +} + + +static uint8_t AESMixInner(uint64_t* x, int stage, bool inverse) { + VIXL_ASSERT(IsUint2(stage)); + + int imc_gm[7] = {0xb, 0xd, 0x9, 0xe}; + int mc_gm[7] = {0x3, 0x0, 0x0, 0x2}; + + int* gm = inverse ? imc_gm : mc_gm; + int index = 3 - stage; + + uint8_t result = 0; + for (int i = 0; i < 4; i++) { + result ^= GalMul(gm[(index + i) % 4], x[i]); + } + return result; +} + + +LogicVRegister Simulator::aesmix(LogicVRegister dst, + const LogicVRegister& src, + bool inverse) { + uint64_t in[16] = {}; + src.UintArray(kFormat16B, in); + dst.ClearForWrite(kFormat16B); + + for (int c = 0; c < 16; c++) { + int cmod4 = c % 4; + int d = c - cmod4; + VIXL_ASSERT((d == 0) || (d == 4) || (d == 8) || (d == 12)); + dst.SetUint(kFormat16B, c, AESMixInner(&in[d], cmod4, inverse)); + } + + return dst; +} + +LogicVRegister Simulator::aes(LogicVRegister dst, + const LogicVRegister& src, + bool decrypt) { + dst.ClearForWrite(kFormat16B); + + // (Inverse) shift rows. + uint8_t shift[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; + uint8_t shift_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3}; + for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) { + uint8_t index = decrypt ? shift_inv[i] : shift[i]; + dst.SetUint(kFormat16B, i, src.Uint(kFormat16B, index)); + } + + // (Inverse) substitute bytes. + static const uint8_t gf2[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, + 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, + 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, + 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, + 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, + 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, + 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, + 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, + 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, + 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, + 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, + 0xb0, 0x54, 0xbb, 0x16, + }; + static const uint8_t gf2_inv[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, + 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, + 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, + 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, + 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, + 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, + 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, + 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, + 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, + 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, + 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, + 0x55, 0x21, 0x0c, 0x7d, + }; + + for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) { + const uint8_t* table = decrypt ? gf2_inv : gf2; + dst.SetUint(kFormat16B, i, table[dst.Uint(kFormat16B, i)]); + } + return dst; +} + } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index eead2451..a989dddb 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -2906,6 +2906,10 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { V(abs, Abs) \ V(addp, Addp) \ V(addv, Addv) \ + V(aesd, Aesd) \ + V(aese, Aese) \ + V(aesimc, Aesimc) \ + V(aesmc, Aesmc) \ V(cls, Cls) \ V(clz, Clz) \ V(cnt, Cnt) \ diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 134362c5..e63715cd 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -7239,7 +7239,26 @@ void Simulator::VisitCrypto3RegSHA(const Instruction* instr) { void Simulator::VisitCryptoAES(const Instruction* instr) { - VisitUnimplemented(instr); + SimVRegister& rd = ReadVRegister(instr->GetRd()); + SimVRegister& rn = ReadVRegister(instr->GetRn()); + SimVRegister temp; + + switch (form_hash_) { + case "aesd_b_cryptoaes"_h: + eor(kFormat16B, temp, rd, rn); + aes(rd, temp, /* decrypt = */ true); + break; + case "aese_b_cryptoaes"_h: + eor(kFormat16B, temp, rd, rn); + aes(rd, temp, /* decrypt = */ false); + break; + case "aesimc_b_cryptoaes"_h: + aesmix(rd, rn, /* inverse = */ true); + break; + case "aesmc_b_cryptoaes"_h: + aesmix(rd, rn, /* inverse = */ false); + break; + } } void Simulator::SimulateSHA512(const Instruction* instr) { diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 19032a6f..c20ec3c1 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -4553,6 +4553,14 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2); + + LogicVRegister aes(LogicVRegister srcdst, + const LogicVRegister& src1, + bool decrypt); + LogicVRegister aesmix(LogicVRegister srcdst, + const LogicVRegister& src1, + bool inverse); + #define NEON_3VREG_LOGIC_LIST(V) \ V(addhn) \ V(addhn2) \ diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc index a6b68fd5..a14c5241 100644 --- a/test/aarch64/test-cpu-features-aarch64.cc +++ b/test/aarch64/test-cpu-features-aarch64.cc @@ -3825,5 +3825,15 @@ TEST_FEAT(sha512su0_0, sha512su0(v2.V2D(), v4.V2D())) TEST_FEAT(sha512su1_0, sha512su1(v19.V2D(), v9.V2D(), v27.V2D())) #undef TEST_FEAT +#define TEST_FEAT(NAME, ASM) \ + TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kAES), \ + NEON_AES_##NAME, \ + ASM) +TEST_FEAT(aesd_0, aesd(v0.V16B(), v29.V16B())) +TEST_FEAT(aese_0, aese(v0.V16B(), v29.V16B())) +TEST_FEAT(aesimc_0, aesimc(v0.V16B(), v29.V16B())) +TEST_FEAT(aesmc_0, aesmc(v0.V16B(), v29.V16B())) +#undef TEST_FEAT + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc index 5bf58b33..5bdded9d 100644 --- a/test/aarch64/test-disasm-neon-aarch64.cc +++ b/test/aarch64/test-disasm-neon-aarch64.cc @@ -4568,6 +4568,17 @@ TEST(neon_sha512) { CLEANUP(); } +TEST(neon_aes) { + SETUP(); + + COMPARE_MACRO(Aesd(v0.V16B(), v29.V16B()), "aesd v0.16b, v29.16b"); + COMPARE_MACRO(Aese(v0.V16B(), v29.V16B()), "aese v0.16b, v29.16b"); + COMPARE_MACRO(Aesimc(v0.V16B(), v29.V16B()), "aesimc v0.16b, v29.16b"); + COMPARE_MACRO(Aesmc(v0.V16B(), v29.V16B()), "aesmc v0.16b, v29.16b"); + + CLEANUP(); +} + TEST(neon_unallocated_regression_test) { SETUP(); diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc index 2961b6a4..e62fea26 100644 --- a/test/aarch64/test-simulator-sve-aarch64.cc +++ b/test/aarch64/test-simulator-sve-aarch64.cc @@ -1326,5 +1326,213 @@ TEST_SVE(neon_sha512su0) { } } +TEST_SVE(neon_aes) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kCRC32, + CPUFeatures::kAES); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x4e285a86); // aesd v6.16b, v20.16b + // vl128 state = 0x801bfc08 + __ dci(0x4e2858ae); // aesd v14.16b, v5.16b + // vl128 state = 0xbd83a757 + __ dci(0x4e2858ac); // aesd v12.16b, v5.16b + // vl128 state = 0x9fb1dc6b + __ dci(0x4e2858ae); // aesd v14.16b, v5.16b + // vl128 state = 0xfa1fa7e4 + __ dci(0x4e28482a); // aese v10.16b, v1.16b + // vl128 state = 0xecfcfe2d + __ dci(0x4e28483a); // aese v26.16b, v1.16b + // vl128 state = 0x05e22f07 + __ dci(0x4e28488a); // aese v10.16b, v4.16b + // vl128 state = 0xdd53df5f + __ dci(0x4e28488e); // aese v14.16b, v4.16b + // vl128 state = 0x9d2ac50f + __ dci(0x4e28484f); // aese v15.16b, v2.16b + // vl128 state = 0xf45146ab + __ dci(0x4e28484b); // aese v11.16b, v2.16b + // vl128 state = 0xf1260a7c + __ dci(0x4e28485b); // aese v27.16b, v2.16b + // vl128 state = 0x3a0844da + __ dci(0x4e285819); // aesd v25.16b, v0.16b + // vl128 state = 0xaca89993 + __ dci(0x4e284a09); // aese v9.16b, v16.16b + // vl128 state = 0xef4e9a5f + __ dci(0x4e285a4b); // aesd v11.16b, v18.16b + // vl128 state = 0x209a44bc + __ dci(0x4e285a4f); // aesd v15.16b, v18.16b + // vl128 state = 0xc6d2d718 + __ dci(0x4e285a4d); // aesd v13.16b, v18.16b + // vl128 state = 0x1aceef8f + __ dci(0x4e285a45); // aesd v5.16b, v18.16b + // vl128 state = 0x7ed056c6 + __ dci(0x4e285af5); // aesd v21.16b, v23.16b + // vl128 state = 0x429ed71e + __ dci(0x4e285a91); // aesd v17.16b, v20.16b + // vl128 state = 0xd7a1f687 + __ dci(0x4e284ad9); // aese v25.16b, v22.16b + // vl128 state = 0x8fa44574 + __ dci(0x4e284adb); // aese v27.16b, v22.16b + // vl128 state = 0xd2792169 + __ dci(0x4e285afa); // aesd v26.16b, v23.16b + // vl128 state = 0xe502f095 + __ dci(0x4e285bbb); // aesd v27.16b, v29.16b + // vl128 state = 0x0e3d3238 + __ dci(0x4e285bbf); // aesd v31.16b, v29.16b + // vl128 state = 0x0ad06592 + __ dci(0x4e285baf); // aesd v15.16b, v29.16b + // vl128 state = 0xb94f3c19 + __ dci(0x4e284b3f); // aese v31.16b, v25.16b + // vl128 state = 0xf31a0da1 + __ dci(0x4e284917); // aese v23.16b, v8.16b + // vl128 state = 0x7d2d7811 + __ dci(0x4e284913); // aese v19.16b, v8.16b + // vl128 state = 0x41b7b854 + __ dci(0x4e284911); // aese v17.16b, v8.16b + // vl128 state = 0x60600536 + __ dci(0x4e2849d5); // aese v21.16b, v14.16b + // vl128 state = 0x3e0cc74f + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x3e0cc74f, + 0x7f17ba2e, + 0xd59f8e91, + 0x9f15a51b, + 0x11d92e66, + 0xcd53d015, + 0xbc652785, + 0x6974fa54, + 0x953d342e, + 0xf1aa56b3, + 0xde8ca1d3, + 0xba408b82, + 0x48094fa4, + 0xb757bcf1, + 0x2cc5be58, + 0x6e7a0f58, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + +TEST_SVE(neon_aesmc) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kCRC32, + CPUFeatures::kAES); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 30 * kInstructionSize); + __ dci(0x4e287800); // aesimc v0.16b, v0.16b + // vl128 state = 0x03554749 + __ dci(0x4e287a28); // aesimc v8.16b, v17.16b + // vl128 state = 0x59d5fedd + __ dci(0x4e287a2a); // aesimc v10.16b, v17.16b + // vl128 state = 0xcda29514 + __ dci(0x4e286aae); // aesmc v14.16b, v21.16b + // vl128 state = 0xae8f019a + __ dci(0x4e286abe); // aesmc v30.16b, v21.16b + // vl128 state = 0x7b04c6c0 + __ dci(0x4e286a0e); // aesmc v14.16b, v16.16b + // vl128 state = 0xaf6c5ce6 + __ dci(0x4e286a0a); // aesmc v10.16b, v16.16b + // vl128 state = 0xf1d7fd2b + __ dci(0x4e286acb); // aesmc v11.16b, v22.16b + // vl128 state = 0x5d693c63 + __ dci(0x4e286acf); // aesmc v15.16b, v22.16b + // vl128 state = 0xec8971ad + __ dci(0x4e286adf); // aesmc v31.16b, v22.16b + // vl128 state = 0x6389b200 + __ dci(0x4e287a9d); // aesimc v29.16b, v20.16b + // vl128 state = 0xd69341fb + __ dci(0x4e28688d); // aesmc v13.16b, v4.16b + // vl128 state = 0x6344af95 + __ dci(0x4e2878cf); // aesimc v15.16b, v6.16b + // vl128 state = 0x5c58dfac + __ dci(0x4e2878cb); // aesimc v11.16b, v6.16b + // vl128 state = 0x7dc9cf34 + __ dci(0x4e2878c9); // aesimc v9.16b, v6.16b + // vl128 state = 0xff4b3544 + __ dci(0x4e2878c1); // aesimc v1.16b, v6.16b + // vl128 state = 0xd1937de2 + __ dci(0x4e287871); // aesimc v17.16b, v3.16b + // vl128 state = 0x7cabd208 + __ dci(0x4e287815); // aesimc v21.16b, v0.16b + // vl128 state = 0xbc06df94 + __ dci(0x4e28685d); // aesmc v29.16b, v2.16b + // vl128 state = 0xfc4478bb + __ dci(0x4e28685f); // aesmc v31.16b, v2.16b + // vl128 state = 0x0c72c200 + __ dci(0x4e28787e); // aesimc v30.16b, v3.16b + // vl128 state = 0xdd822b9d + __ dci(0x4e28793f); // aesimc v31.16b, v9.16b + // vl128 state = 0x1397dcc6 + __ dci(0x4e28793b); // aesimc v27.16b, v9.16b + // vl128 state = 0x43f3abd6 + __ dci(0x4e28792b); // aesimc v11.16b, v9.16b + // vl128 state = 0xeb8ca365 + __ dci(0x4e2869bb); // aesmc v27.16b, v13.16b + // vl128 state = 0x0a957f4f + __ dci(0x4e286b93); // aesmc v19.16b, v28.16b + // vl128 state = 0xbc5da8bd + __ dci(0x4e286b97); // aesmc v23.16b, v28.16b + // vl128 state = 0xc49343cc + __ dci(0x4e286b95); // aesmc v21.16b, v28.16b + // vl128 state = 0x8c80c144 + __ dci(0x4e286b51); // aesmc v17.16b, v26.16b + // vl128 state = 0xeda3255d + __ dci(0x4e2869d3); // aesmc v19.16b, v14.16b + // vl128 state = 0x8db8a9d0 + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x8db8a9d0, + 0xb13d8e1e, + 0x9f33ca70, + 0x38f7ef7a, + 0x65352b29, + 0xc4257260, + 0xf49587c2, + 0xb3f61256, + 0x8ef4a534, + 0x6e061aa9, + 0x7270527d, + 0x3e1f82f9, + 0x1fe79e60, + 0x985cab68, + 0xe77b4484, + 0xe3817f4e, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + } // namespace aarch64 } // namespace vixl