Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support SHA-3 accelerating instructions #101

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/aarch64/assembler-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5876,6 +5876,39 @@ void Assembler::ummla(const VRegister& vd, const VRegister& vn, const VRegister&
Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm));
}

void Assembler::bcax(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B());

Emit(0xce200000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
}

void Assembler::eor3(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B() && va.Is16B());

Emit(0xce000000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
}

void Assembler::xar(const VRegister& vd, const VRegister& vn, const VRegister& vm, int rotate) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
VIXL_ASSERT(IsUint6(rotate));

Emit(0xce800000 | Rd(vd) | Rn(vn) | Rm(vm) | rotate << 10);
}

void Assembler::rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());

Emit(0xce608c00 | Rd(vd) | Rn(vn) | Rm(vm));
}

// Note:
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
Expand Down
21 changes: 21 additions & 0 deletions src/aarch64/assembler-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -3621,6 +3621,27 @@ class Assembler : public vixl::internal::AssemblerBase {
// Unsigned 8-bit integer matrix multiply-accumulate (vector).
void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);

// Bit Clear and exclusive-OR.
void bcax(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
const VRegister& va);

// Three-way Exclusive-OR.
void eor3(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
const VRegister& va);

// Exclusive-OR and Rotate.
void xar(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
int rotate);

// Rotate and Exclusive-OR
void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);

// Scalable Vector Extensions.

// Absolute value (predicated).
Expand Down
8 changes: 8 additions & 0 deletions src/aarch64/cpu-features-auditor-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1835,6 +1835,14 @@ void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
{"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
{"bcax_vvv16_crypto4"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
{"eor3_vvv16_crypto4"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
{"rax1_vvv2_cryptosha512_3"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
{"xar_vvv2_crypto3_imm6"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
};

if (features.count(form_hash_) > 0) {
Expand Down
4 changes: 0 additions & 4 deletions src/aarch64/decoder-visitor-map-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -2638,15 +2638,13 @@
&VISITORCLASS::VisitUnconditionalBranchToRegister}, \
{"ret_64r_branch_reg"_h, \
&VISITORCLASS::VisitUnconditionalBranchToRegister}, \
{"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfmlal_asimdelem_f"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfmlal_asimdsame2_f"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented}, \
{"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented}, \
{"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
Expand All @@ -2658,7 +2656,6 @@
{"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
{"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
{"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
{"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
{"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
Expand Down Expand Up @@ -2686,7 +2683,6 @@
{"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \
{"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
{"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
{"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfdot_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, \
Expand Down
15 changes: 15 additions & 0 deletions src/aarch64/disasm-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,10 @@ const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() {
{"umax_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
{"umin_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
{"umin_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
{"bcax_vvv16_crypto4"_h, &Disassembler::DisassembleNEON4Same},
{"eor3_vvv16_crypto4"_h, &Disassembler::DisassembleNEON4Same},
{"xar_vvv2_crypto3_imm6"_h, &Disassembler::DisassembleNEONXar},
{"rax1_vvv2_cryptosha512_3"_h, &Disassembler::DisassembleNEONRax1},
};
return &form_to_visitor;
} // NOLINT(readability/fn_size)
Expand Down Expand Up @@ -2430,6 +2434,17 @@ void Disassembler::VisitNEON3SameExtra(const Instruction *instr) {
Format(instr, mnemonic, nfd.Substitute(form), suffix);
}

void Disassembler::DisassembleNEON4Same(const Instruction *instr) {
FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b, 'Vm.16b, 'Va.16b");
}

void Disassembler::DisassembleNEONXar(const Instruction *instr) {
FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d, #'u1510");
}

void Disassembler::DisassembleNEONRax1(const Instruction *instr) {
FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d");
}

void Disassembler::VisitNEON3Different(const Instruction *instr) {
const char *mnemonic = mnemonic_.c_str();
Expand Down
3 changes: 3 additions & 0 deletions src/aarch64/disasm-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ class Disassembler : public DecoderVisitor {
void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
void DisassembleNEONPolynomialMul(const Instruction* instr);
void DisassembleNEON4Same(const Instruction* instr);
void DisassembleNEONXar(const Instruction* instr);
void DisassembleNEONRax1(const Instruction* instr);

void DisassembleMTELoadTag(const Instruction* instr);
void DisassembleMTEStoreTag(const Instruction* instr);
Expand Down
25 changes: 25 additions & 0 deletions src/aarch64/macro-assembler-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -2787,6 +2787,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
V(pmull2, Pmull2) \
V(raddhn, Raddhn) \
V(raddhn2, Raddhn2) \
V(rax1, Rax1) \
V(rsubhn, Rsubhn) \
V(rsubhn2, Rsubhn2) \
V(saba, Saba) \
Expand Down Expand Up @@ -3152,6 +3153,14 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
#undef DEFINE_MACRO_ASM_FUNC

void Bcax(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
const VRegister& va) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
bcax(vd, vn, vm, va);
}
void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
Expand Down Expand Up @@ -3192,6 +3201,14 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
SingleEmissionCheckScope guard(this);
dup(vd, rn);
}
void Eor3(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
const VRegister& va) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
eor3(vd, vn, vm, va);
}
void Ext(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
Expand Down Expand Up @@ -3498,6 +3515,14 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
SingleEmissionCheckScope guard(this);
umov(rd, vn, vn_index);
}
void Xar(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
int rotate) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
xar(vd, vn, vm, rotate);
}
void Crc32b(const Register& rd, const Register& rn, const Register& rm) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
Expand Down
32 changes: 32 additions & 0 deletions src/aarch64/simulator-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,10 @@ const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
{"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
{"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
{"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
{"bcax_vvv16_crypto4"_h, &Simulator::SimulateNEONSHA3},
{"eor3_vvv16_crypto4"_h, &Simulator::SimulateNEONSHA3},
{"rax1_vvv2_cryptosha512_3"_h, &Simulator::SimulateNEONSHA3},
{"xar_vvv2_crypto3_imm6"_h, &Simulator::SimulateNEONSHA3},
};
return &form_to_visitor;
}
Expand Down Expand Up @@ -9926,6 +9930,34 @@ void Simulator::VisitNEONPerm(const Instruction* instr) {
}
}

void Simulator::SimulateNEONSHA3(const Instruction* instr) {
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
SimVRegister& rm = ReadVRegister(instr->GetRm());
SimVRegister& ra = ReadVRegister(instr->GetRa());
SimVRegister temp;

switch (form_hash_) {
case "bcax_vvv16_crypto4"_h:
bic(kFormat16B, temp, rm, ra);
eor(kFormat16B, rd, rn, temp);
break;
case "eor3_vvv16_crypto4"_h:
eor(kFormat16B, temp, rm, ra);
eor(kFormat16B, rd, rn, temp);
break;
case "rax1_vvv2_cryptosha512_3"_h:
ror(kFormat2D, temp, rm, 63); // rol(1) => ror(63)
eor(kFormat2D, rd, rn, temp);
break;
case "xar_vvv2_crypto3_imm6"_h:
int rot = instr->ExtractBits(15, 10);
eor(kFormat2D, temp, rn, rm);
ror(kFormat2D, rd, temp, rot);
break;
}
}

void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
SimVRegister& zd = ReadVRegister(instr->GetRd());
SimVRegister& zn = ReadVRegister(instr->GetRn());
Expand Down
1 change: 1 addition & 0 deletions src/aarch64/simulator-aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1509,6 +1509,7 @@ class Simulator : public DecoderVisitor {
void SimulateNEONFPMulByElementLong(const Instruction* instr);
void SimulateNEONComplexMulByElement(const Instruction* instr);
void SimulateNEONDotProdByElement(const Instruction* instr);
void SimulateNEONSHA3(const Instruction* instr);
void SimulateMTEAddSubTag(const Instruction* instr);
void SimulateMTETagMaskInsert(const Instruction* instr);
void SimulateMTESubPointer(const Instruction* instr);
Expand Down
9 changes: 9 additions & 0 deletions test/aarch64/test-cpu-features-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3785,5 +3785,14 @@ TEST_FP_FCMA_NEON_NEONHALF(fcmla_3, fcmla(v0.V8H(), v1.V8H(), v2.V8H(), 0))
TEST_FEAT(pmull1q_0, pmull(v5.V1Q(), v6.V1D(), v7.V1D()))
#undef TEST_FEAT

#define TEST_NEON_SHA3(NAME, ASM) \
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3), \
NEON_SHA3_##NAME, \
ASM)
TEST_NEON_SHA3(bcax_0, bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
TEST_NEON_SHA3(eor3_0, eor3(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
TEST_NEON_SHA3(xar_0, xar(v0.V2D(), v1.V2D(), v2.V2D(), 42))
TEST_NEON_SHA3(rax1_0, rax1(v0.V2D(), v1.V2D(), v2.V2D()))

} // namespace aarch64
} // namespace vixl
8 changes: 4 additions & 4 deletions test/aarch64/test-disasm-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3789,10 +3789,10 @@ TEST(architecture_features) {
COMPARE_PREFIX(dci(0xd503221f), "esb"); // ESB_HI_hints

// ARMv8.2 - SHA3
// COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
// COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
// COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
// COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6
COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6

// ARMv8.2 - SHA512
// COMPARE_PREFIX(dci(0xce608000), "sha512h"); // SHA512H_QQV_cryptosha512_3
Expand Down
14 changes: 14 additions & 0 deletions test/aarch64/test-disasm-neon-aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4516,6 +4516,20 @@ TEST(neon_matmul) {
CLEANUP();
}

TEST(neon_sha3) {
SETUP();

COMPARE_MACRO(Bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()),
"bcax v0.16b, v1.16b, v2.16b, v3.16b");
COMPARE_MACRO(Eor3(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B()),
"eor3 v10.16b, v11.16b, v12.16b, v13.16b");
COMPARE_MACRO(Xar(v20.V2D(), v21.V2D(), v22.V2D(), 42),
"xar v20.2d, v21.2d, v22.2d, #42");
COMPARE_MACRO(Rax1(v0.V2D(), v1.V2D(), v2.V2D()), "rax1 v0.2d, v1.2d, v2.2d");

CLEANUP();
}

TEST(neon_unallocated_regression_test) {
SETUP();

Expand Down
Loading