diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index c0d19f652f41..6d06af48f4f1 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -657,11 +657,16 @@ static constexpr u32 MaskImm26(s64 distance) } // FixupBranch branching -void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch) +{ + SetJumpTarget(branch, m_code); +} + +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { bool Not = false; u32 inst = 0; - s64 distance = (s64)(m_code - branch.ptr); + s64 distance = static_cast(target - branch.ptr); distance >>= 2; switch (branch.type) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index c609757bd16b..8fd162dd9ab6 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -790,7 +790,8 @@ class ARM64XEmitter u8* GetWritableCodePtr(); // FixupBranch branching - void SetJumpTarget(FixupBranch const& branch); + void SetJumpTarget(const FixupBranch& branch); + void SetJumpTarget(const FixupBranch& branch, const u8* target); FixupBranch CBZ(ARM64Reg Rt); FixupBranch CBNZ(ARM64Reg Rt); FixupBranch B(CCFlags cond); diff --git a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h index 9ad4e04cdc7f..2fd42a953a51 100644 --- a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h @@ -7,10 +7,17 @@ #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/PowerPC.h" -inline void GenerateAlignmentException(u32 address) +inline void GenerateAlignmentException(u32 effective_address, UGeckoInstruction inst) { PowerPC::ppcState.Exceptions |= EXCEPTION_ALIGNMENT; - PowerPC::ppcState.spr[SPR_DAR] = address; + PowerPC::ppcState.spr[SPR_DAR] = effective_address; + + // It has not been hardware tested what gets used instead of RD and RA in + // the cases documented as undefined. For now, simply use RD and RA + const bool x = inst.OPCD >= 32; + const u32 op = x ? inst.SUBOP10 : (inst.OPCD >> 1); + const u32 dsisr = ((op >> 8) << 15) | ((op & 0b11111) << 10) | (inst.RD << 5) | (inst.RA); + PowerPC::ppcState.spr[SPR_DSISR] = dsisr; } inline void GenerateDSIException(u32 address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index dfbf767dee6a..949424fbd25d 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -58,13 +58,6 @@ void Interpreter::lbzu(UGeckoInstruction inst) void Interpreter::lfd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -74,13 +67,6 @@ void Interpreter::lfd(UGeckoInstruction inst) void Interpreter::lfdu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -93,13 +79,6 @@ void Interpreter::lfdu(UGeckoInstruction inst) void Interpreter::lfdux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -112,13 +91,6 @@ void Interpreter::lfdux(UGeckoInstruction inst) void Interpreter::lfdx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -128,13 +100,6 @@ void Interpreter::lfdx(UGeckoInstruction inst) void Interpreter::lfs(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -147,13 +112,6 @@ void Interpreter::lfs(UGeckoInstruction inst) void Interpreter::lfsu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -167,13 +125,6 @@ void Interpreter::lfsu(UGeckoInstruction inst) void Interpreter::lfsux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -187,13 +138,6 @@ void Interpreter::lfsux(UGeckoInstruction inst) void Interpreter::lfsx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -252,9 +196,9 @@ void Interpreter::lmw(UGeckoInstruction inst) { u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0 || MSR.LE) + if (MSR.LE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } @@ -283,9 +227,9 @@ void Interpreter::stmw(UGeckoInstruction inst) { u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0 || MSR.LE) + if (MSR.LE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } @@ -344,12 +288,6 @@ void Interpreter::stfd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); } @@ -357,12 +295,6 @@ void Interpreter::stfdu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -374,12 +306,6 @@ void Interpreter::stfs(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); } @@ -387,12 +313,6 @@ void Interpreter::stfsu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -501,7 +421,7 @@ void Interpreter::dcbz(UGeckoInstruction inst) if (!HID0.DCE) { - GenerateAlignmentException(dcbz_addr); + GenerateAlignmentException(dcbz_addr, inst); return; } @@ -525,7 +445,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) if (!HID0.DCE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } @@ -545,13 +465,11 @@ void Interpreter::eciwx(UGeckoInstruction inst) return; } - if ((EA & 0b11) != 0) + const u32 temp = PowerPC::Read_U32(EA, inst); + if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { - GenerateAlignmentException(EA); - return; + rGPR[inst.RD] = temp; } - - rGPR[inst.RD] = PowerPC::Read_U32(EA, inst); } void Interpreter::ecowx(UGeckoInstruction inst) @@ -564,12 +482,6 @@ void Interpreter::ecowx(UGeckoInstruction inst) return; } - if ((EA & 0b11) != 0) - { - GenerateAlignmentException(EA); - return; - } - PowerPC::Write_U32(rGPR[inst.RS], EA, inst); } @@ -666,13 +578,14 @@ void Interpreter::lhzx(UGeckoInstruction inst) } // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswx(UGeckoInstruction inst) { u32 EA = Helper_Get_EA_X(PowerPC::ppcState, inst); if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -752,12 +665,6 @@ void Interpreter::stfdux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -769,12 +676,6 @@ void Interpreter::stfdx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); } @@ -783,12 +684,6 @@ void Interpreter::stfiwx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address, inst); } @@ -796,12 +691,6 @@ void Interpreter::stfsux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -813,12 +702,6 @@ void Interpreter::stfsx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); } @@ -845,6 +728,7 @@ void Interpreter::sthx(UGeckoInstruction inst) // lswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswi(UGeckoInstruction inst) { u32 EA = 0; @@ -853,7 +737,7 @@ void Interpreter::lswi(UGeckoInstruction inst) if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -892,6 +776,7 @@ void Interpreter::lswi(UGeckoInstruction inst) // todo : optimize ? // stswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswi(UGeckoInstruction inst) { u32 EA = 0; @@ -900,7 +785,7 @@ void Interpreter::stswi(UGeckoInstruction inst) if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -932,13 +817,14 @@ void Interpreter::stswi(UGeckoInstruction inst) } // TODO: is this right? is it DSI interruptible? +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswx(UGeckoInstruction inst) { u32 EA = Helper_Get_EA_X(PowerPC::ppcState, inst); if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -975,12 +861,6 @@ void Interpreter::lwarx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -996,12 +876,6 @@ void Interpreter::stwcxd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - if (PowerPC::ppcState.reserve) { if (address == PowerPC::ppcState.reserve_address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index 45c1f8b9b45c..80090494f697 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -89,7 +89,7 @@ static std::array primarytable = {60, Interpreter::psq_st, {"psq_st", OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {61, Interpreter::psq_stu, {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - //missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 + //missing: 0, 1, 2, 5, 6, 9, 22, 30, 58, 62 }}; static std::array table4 = diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 5cc28b83c88b..236e37201742 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -426,7 +426,7 @@ void Jit64::dcbz(UGeckoInstruction inst) if (emit_fast_path) { - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0])); PUSH(RSCRATCH); SHR(32, R(RSCRATCH), Imm8(PowerPC::BAT_INDEX_SHIFT)); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index c26b00f5ed46..619e8a3effcd 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -101,8 +101,8 @@ FixupBranch EmuCodeBlock::BATAddressLookup(X64Reg addr, X64Reg tmp, const void* return J_CC(CC_NC, m_far_code.Enabled()); } -FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr, - BitSet32 registers_in_use) +FixupBranch EmuCodeBlock::CheckIfBATSafeAddress(const OpArg& reg_value, X64Reg reg_addr, + BitSet32 registers_in_use) { registers_in_use[reg_addr] = true; if (reg_value.IsSimpleReg()) @@ -117,7 +117,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ if (reg_addr != RSCRATCH_EXTRA) MOV(32, R(RSCRATCH_EXTRA), R(reg_addr)); - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH), ImmPtr(&PowerPC::dbat_table[0])); SHR(32, R(RSCRATCH_EXTRA), Imm8(PowerPC::BAT_INDEX_SHIFT)); TEST(32, MComplex(RSCRATCH, RSCRATCH_EXTRA, SCALE_4, 0), Imm32(PowerPC::BAT_PHYSICAL_BIT)); @@ -130,6 +130,13 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ return J_CC(CC_Z, m_far_code.Enabled()); } +FixupBranch EmuCodeBlock::CheckIfAlignmentSafeAddress(X64Reg reg_addr, int access_size, + UGeckoInstruction inst) +{ + TEST(32, R(reg_addr), Imm32(PowerPC::GetAlignmentMask(access_size))); + return J_CC(CC_NZ, m_far_code.Enabled()); +} + void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { @@ -332,11 +339,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, bool signExtend, int flags) { bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, accessSize); auto& js = m_jit.js; registersInUse[reg_value] = false; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !slowmem && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -388,13 +397,21 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(R(reg_value), reg_addr, registersInUse); + UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(true); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // Helps external systems know which instruction triggered the read. @@ -450,7 +467,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc bool signExtend) { // If the address is known to be RAM, just load it directly. - if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) + if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address, accessSize, inst)) { UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend); return; @@ -504,13 +521,15 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces { bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, accessSize); // set the correct immediate format reg_value = FixImmediate(accessSize, reg_value); auto& js = m_jit.js; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !slowmem && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -558,13 +577,21 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(reg_value, reg_addr, registersInUse); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(true); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs @@ -661,7 +688,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, m_jit.js.fifoBytesSinceCheck += accessSize >> 3; return false; } - else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) + else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address, accessSize, inst)) { WriteToConstRamAddress(accessSize, arg, address); return false; diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 63e0d6cc3e9c..a544ee6e3b60 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -54,8 +54,10 @@ class EmuCodeBlock : public Gen::X64CodeBlock // Jumps to the returned FixupBranch if lookup fails. Gen::FixupBranch BATAddressLookup(Gen::X64Reg addr, Gen::X64Reg tmp, const void* bat_table); - Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, - BitSet32 registers_in_use); + Gen::FixupBranch CheckIfBATSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, + BitSet32 registers_in_use); + Gen::FixupBranch CheckIfAlignmentSafeAddress(Gen::X64Reg reg_addr, int access_size, + UGeckoInstruction inst); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index d84c65f493ef..b9aa758e275a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -58,12 +58,24 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fast BitSet32 gprs_to_push, BitSet32 fprs_to_push, bool emitting_routine) { + const u32 access_size = BackPatchInfo::GetFlagSize(flags); + bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); std::optional slowmem_fixup; if (fastmem) { + if (do_farcode && jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, access_size)) + { + const u32 mask = PowerPC::GetAlignmentMask(access_size); + TST(addr, LogicalImm(mask, 32)); + FixupBranch fast = B(CCFlags::CC_EQ); + slowmem_fixup = emitting_routine ? B() : BL(); + SetJumpTarget(fast); + } + if (do_farcode && emitting_routine) { const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W4; @@ -156,6 +168,10 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fast const u8* handler_loc = handler_loc_iter->second; fastmem_area->fastmem_code = fastmem_start; fastmem_area->slowmem_code = handler_loc; + + if (slowmem_fixup) + SetJumpTarget(*slowmem_fixup); + return; } } @@ -169,7 +185,6 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fast if (flags & BackPatchInfo::FLAG_STORE) { - const u32 access_size = BackPatchInfo::GetFlagSize(flags); ARM64Reg src_reg = RS; const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0; @@ -217,8 +232,6 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fast } else { - const u32 access_size = BackPatchInfo::GetFlagSize(flags); - if (access_size == 64) MOVP2R(ARM64Reg::X8, &PowerPC::Read_U64); else if (access_size == 32) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index e89ee19bf95f..ec13cbc0526e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -121,7 +121,8 @@ void JitArm64::SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 off if (is_immediate) mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { EmitBackpatchRoutine(inst, flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); } @@ -256,7 +257,8 @@ void JitArm64::SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 js.fifoBytesSinceCheck += accessSize >> 3; } - else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { MOVI2R(XA, imm_addr); EmitBackpatchRoutine(inst, flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 57187d3f2822..41968ea5d25a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -167,7 +167,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(VD)] = 0; - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst)) { EmitBackpatchRoutine(inst, flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); } @@ -356,16 +357,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; + const u32 access_size = BackPatchInfo::GetFlagSize(flags); + if (is_immediate) { if (jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) { - int accessSize; - if (flags & BackPatchInfo::FLAG_SIZE_64) - accessSize = 64; - else - accessSize = 32; - LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (flags & BackPatchInfo::FLAG_SIZE_64) @@ -373,11 +370,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (flags & BackPatchInfo::FLAG_SIZE_32) m_float_emit.REV32(8, ARM64Reg::D0, V0); - m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, - ARM64Reg::X0, accessSize >> 3); + m_float_emit.STR(access_size, IndexType::Post, + access_size == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, ARM64Reg::X0, + access_size >> 3); STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); - js.fifoBytesSinceCheck += accessSize >> 3; + js.fifoBytesSinceCheck += access_size >> 3; if (update) { @@ -386,7 +384,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOVI2R(gpr.R(a), imm_addr); } } - else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { EmitBackpatchRoutine(inst, flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index c42fce554ec8..a1a1e96153c5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -45,5 +45,6 @@ void JitBase::UpdateMemoryOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints); - jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; + jo.alignment_exceptions = SConfig::GetInstance().bAlignmentExceptions; + jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints || jo.alignment_exceptions; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 18784ff095b2..43949daf354a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -62,6 +62,7 @@ class JitBase : public CPUCoreBase bool accurateSinglePrecision; bool fastmem; bool fastmem_arena; + bool alignment_exceptions; bool memcheck; bool profile_blocks; }; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 9fb0aeebb899..6c3e4dfbd45f 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -3,6 +3,7 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include @@ -20,6 +21,7 @@ #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" @@ -186,12 +188,18 @@ static T ReadFromHardware(u32 em_address, UGeckoInstruction inst) GenerateDSIException(em_address, false); return 0; } + + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, translated_addr.wi)) + { + GenerateAlignmentException(em_address, inst); + return 0; + } + if ((em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) { // This could be unaligned down to the byte level... hopefully this is rare, so doing it this // way isn't too terrible. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1); auto addr_next_page = TranslateAddress(em_address_next_page); if (!addr_next_page.Success()) @@ -212,6 +220,15 @@ static T ReadFromHardware(u32 em_address, UGeckoInstruction inst) } em_address = translated_addr.address; } + else + { + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, false)) + { + GenerateAlignmentException(em_address, inst); + return 0; + } + } // TODO: Make sure these are safe for unaligned addresses. @@ -273,8 +290,6 @@ static void WriteToHardware(u32 em_address, const u32 data, const u32 size, UGec if (em_address_start_page != em_address_end_page) { // The write crosses a page boundary. Break it up into two writes. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! const u32 first_half_size = em_address_end_page - em_address; const u32 second_half_size = size - first_half_size; WriteToHardware( @@ -298,6 +313,13 @@ static void WriteToHardware(u32 em_address, const u32 data, const u32 size, UGec wi = translated_addr.wi; } + if (flag == XCheckTLBFlag::Write && + AccessCausesAlignmentException(em_address, size << 3, inst, wi)) + { + GenerateAlignmentException(em_address, inst); + return; + } + // Check for a gather pipe write. // Note that we must mask the address to correctly emulate certain games; // Pac-Man World 3 in particular is affected by this. @@ -866,7 +888,7 @@ TryReadResult HostTryReadString(u32 address, size_t size, Requested return TryReadResult(c.translated, std::move(s)); } -bool IsOptimizableRAMAddress(const u32 address) +bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst) { if (PowerPC::memchecks.HasAny()) return false; @@ -874,8 +896,12 @@ bool IsOptimizableRAMAddress(const u32 address) if (!MSR.DR) return false; - // TODO: This API needs to take an access size - // + if ((address & GetAlignmentMask(access_size)) != 0 && + AccessCausesAlignmentExceptionIfMisaligned(inst, access_size)) + { + return false; + } + // We store whether an access can be optimized to an unchecked access // in dbat_table. u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT]; @@ -1071,7 +1097,7 @@ u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) return 0; // Check whether the address is an aligned address of an MMIO register. - const bool aligned = (address & ((access_size >> 3) - 1)) == 0; + const bool aligned = (address & GetAlignmentMask(access_size)) == 0; if (!aligned || !MMIO::IsMMIOAddress(address)) return 0; @@ -1560,4 +1586,75 @@ std::optional GetTranslatedAddress(u32 address) return std::optional(result.address); } +static bool IsDcbz(UGeckoInstruction inst) +{ + // dcbz, dcbz_l + return inst.SUBOP10 == 1014 && (inst.OPCD == 31 || inst.OPCD == 4); +} + +static bool IsFloat(UGeckoInstruction inst, size_t access_size) +{ + // Floating loadstore + if (inst.OPCD >= 48 && inst.OPCD < 56) + return true; + + // Paired non-indexed loadstore + if (inst.OPCD >= 56 && inst.OPCD < 62) + return access_size == (inst.W ? 32 : 64); + + // Paired indexed loadstore + if (inst.OPCD == 4 && inst.SUBOP10 != 1014) + return access_size == (inst.Wx ? 32 : 64); + + return false; +} + +static bool IsMultiword(UGeckoInstruction inst) +{ + // lmw, stmw + if (inst.OPCD == 46 || inst.OPCD == 47) + return true; + + if (inst.OPCD != 31) + return false; + + // lswx, lswi, stswx, stswi + return inst.SUBOP10 == 533 || inst.SUBOP10 == 597 || inst.SUBOP10 == 661 || inst.SUBOP10 == 725; +} + +static bool IsLwarxOrStwcx(UGeckoInstruction inst) +{ + // lwarx, stwcx + return inst.OPCD == 31 && (inst.SUBOP10 == 20 || inst.SUBOP10 == 150); +} + +static bool IsEciwxOrEcowx(UGeckoInstruction inst) +{ + // eciwx, ecowx + return inst.OPCD == 31 && (inst.SUBOP10 == 310 || inst.SUBOP10 == 438); +} + +bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst) +{ + return IsDcbz(inst); +} + +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, size_t access_size) +{ + return IsFloat(inst, access_size) || IsMultiword(inst) || IsLwarxOrStwcx(inst) || + IsEciwxOrEcowx(inst); +} + +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi) +{ + if (wi && AccessCausesAlignmentExceptionIfWi(inst)) + return true; + + if ((effective_address & GetAlignmentMask(access_size)) == 0) + return false; + + return AccessCausesAlignmentExceptionIfMisaligned(inst, access_size); +} + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 1fb814a016f1..12aef6e8aee8 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -185,7 +185,7 @@ void IBATUpdated(); // Result changes based on the BAT registers and MSR.DR. Returns whether // it's safe to optimize a read or write to this address to an unguarded // memory access. Does not consider page tables. -bool IsOptimizableRAMAddress(u32 address); +bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst); u32 IsOptimizableMMIOAccess(u32 address, u32 access_size); bool IsOptimizableGatherPipeWrite(u32 address); @@ -229,4 +229,15 @@ constexpr u32 HW_PAGE_INDEX_SHIFT = 12; constexpr u32 HW_PAGE_INDEX_MASK = 0x3f; std::optional GetTranslatedAddress(u32 address); + +constexpr u32 GetAlignmentMask(size_t size) +{ + return static_cast(std::min(4, size >> 3) - 1); +} + +bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst); +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, size_t access_size); +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi); + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 1f964b54b7cd..86e58d056647 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -538,8 +538,7 @@ void CheckExceptions() MSR.LE = MSR.ILE; MSR.Hex &= ~0x04EF36; PC = NPC = 0x00000600; - - // TODO crazy amount of DSISR options to check out + // DSISR and DAR regs are changed in GenerateAlignmentException() DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT"); ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT;