From a2ba3fcc7367a3c95348d48911fd7684ae00aa84 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 4 Jul 2021 20:47:04 +0200 Subject: [PATCH] PowerPC: Raise alignment exceptions in more situations To avoid affecting performance, the JITs will most of the time not raise alignment exceptions unless you enable the new INI-only setting AlignmentExceptions. --- Source/Core/Common/Arm64Emitter.cpp | 9 +- Source/Core/Common/Arm64Emitter.h | 3 +- .../Core/PowerPC/Interpreter/ExceptionUtils.h | 12 +- .../Interpreter/Interpreter_LoadStore.cpp | 160 ++---------------- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 49 ++++-- .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 7 +- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 10 ++ .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 4 +- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 5 +- .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 5 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 2 + Source/Core/Core/PowerPC/MMU.cpp | 157 +++++++++++++---- Source/Core/Core/PowerPC/MMU.h | 13 +- Source/Core/Core/PowerPC/PowerPC.cpp | 3 +- 15 files changed, 238 insertions(+), 203 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index f769edc2f381..86748a0ff20d 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -693,14 +693,19 @@ static constexpr u32 MaskImm26(s64 distance) } // FixupBranch branching -void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch) +{ + SetJumpTarget(branch, m_code); +} + +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { if (!branch.ptr) return; bool Not = false; u32 inst = 0; - s64 distance = (s64)(m_code - branch.ptr); + s64 distance = static_cast(target - branch.ptr); distance >>= 2; switch (branch.type) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index e97e72f3c379..3ebba37b9ca5 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -695,7 +695,8 @@ class ARM64XEmitter bool HasWriteFailed() const { return m_write_failed; } // FixupBranch branching - void SetJumpTarget(FixupBranch const& branch); + void SetJumpTarget(const FixupBranch& branch); + void SetJumpTarget(const FixupBranch& branch, const u8* target); [[nodiscard]] FixupBranch CBZ(ARM64Reg Rt); [[nodiscard]] FixupBranch CBNZ(ARM64Reg Rt); [[nodiscard]] FixupBranch B(CCFlags cond); diff --git a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h index fd19c0dae67e..2a58dba935b2 100644 --- a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h @@ -15,10 +15,18 @@ enum class ProgramExceptionCause : u32 Trap = 1 << (31 - 14), }; -inline void GenerateAlignmentException(PowerPC::PowerPCState& ppc_state, u32 address) +inline void GenerateAlignmentException(PowerPC::PowerPCState& ppc_state, u32 effective_address, + UGeckoInstruction inst) { ppc_state.Exceptions |= EXCEPTION_ALIGNMENT; - ppc_state.spr[SPR_DAR] = address; + ppc_state.spr[SPR_DAR] = effective_address; + + // It has not been hardware tested what gets used instead of RD and RA in + // the cases documented as undefined. For now, simply use RD and RA + const bool x = inst.OPCD >= 32; + const u32 op = x ? inst.SUBOP10 : (inst.OPCD >> 1); + const u32 dsisr = ((op >> 8) << 15) | ((op & 0b11111) << 10) | (inst.RD << 5) | (inst.RA); + ppc_state.spr[SPR_DSISR] = dsisr; } inline void GenerateDSIException(PowerPC::PowerPCState& ppc_state, u32 address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 6e09a671db70..09e3999488ed 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -64,13 +64,6 @@ void Interpreter::lfd(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -81,13 +74,6 @@ void Interpreter::lfdu(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -101,13 +87,6 @@ void Interpreter::lfdux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -121,13 +100,6 @@ void Interpreter::lfdx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -138,13 +110,6 @@ void Interpreter::lfs(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -158,13 +123,6 @@ void Interpreter::lfsu(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -179,13 +137,6 @@ void Interpreter::lfsux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -200,13 +151,6 @@ void Interpreter::lfsx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -270,9 +214,9 @@ void Interpreter::lmw(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; u32 address = Helper_Get_EA(ppc_state, inst); - if ((address & 0b11) != 0 || ppc_state.msr.LE) + if (ppc_state.msr.LE) { - GenerateAlignmentException(ppc_state, address); + GenerateAlignmentException(ppc_state, address, inst); return; } @@ -302,9 +246,9 @@ void Interpreter::stmw(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; u32 address = Helper_Get_EA(ppc_state, inst); - if ((address & 0b11) != 0 || ppc_state.msr.LE) + if (ppc_state.msr.LE) { - GenerateAlignmentException(ppc_state, address); + GenerateAlignmentException(ppc_state, address, inst); return; } @@ -368,12 +312,6 @@ void Interpreter::stfd(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); } @@ -382,12 +320,6 @@ void Interpreter::stfdu(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -400,12 +332,6 @@ void Interpreter::stfs(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); } @@ -414,12 +340,6 @@ void Interpreter::stfsu(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -541,7 +461,7 @@ void Interpreter::dcbz(Interpreter& interpreter, UGeckoInstruction inst) if (!HID0(ppc_state).DCE) { - GenerateAlignmentException(ppc_state, dcbz_addr); + GenerateAlignmentException(ppc_state, dcbz_addr, inst); return; } @@ -572,7 +492,7 @@ void Interpreter::dcbz_l(Interpreter& interpreter, UGeckoInstruction inst) if (!HID0(ppc_state).DCE) { - GenerateAlignmentException(ppc_state, address); + GenerateAlignmentException(ppc_state, address, inst); return; } @@ -592,13 +512,11 @@ void Interpreter::eciwx(Interpreter& interpreter, UGeckoInstruction inst) return; } - if ((EA & 0b11) != 0) + const u32 temp = interpreter.m_mmu.Read_U32(EA, inst); + if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { - GenerateAlignmentException(ppc_state, EA); - return; + ppc_state.gpr[inst.RD] = temp; } - - ppc_state.gpr[inst.RD] = interpreter.m_mmu.Read_U32(EA, inst); } void Interpreter::ecowx(Interpreter& interpreter, UGeckoInstruction inst) @@ -612,12 +530,6 @@ void Interpreter::ecowx(Interpreter& interpreter, UGeckoInstruction inst) return; } - if ((EA & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, EA); - return; - } - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], EA, inst); } @@ -724,6 +636,7 @@ void Interpreter::lhzx(Interpreter& interpreter, UGeckoInstruction inst) } // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; @@ -731,7 +644,7 @@ void Interpreter::lswx(Interpreter& interpreter, UGeckoInstruction inst) if (ppc_state.msr.LE) { - GenerateAlignmentException(ppc_state, EA); + GenerateAlignmentException(ppc_state, EA, inst); return; } @@ -817,12 +730,6 @@ void Interpreter::stfdux(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -835,12 +742,6 @@ void Interpreter::stfdx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); } @@ -850,12 +751,6 @@ void Interpreter::stfiwx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U32(ppc_state.ps[inst.FS].PS0AsU32(), address, inst); } @@ -864,12 +759,6 @@ void Interpreter::stfsux(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -882,12 +771,6 @@ void Interpreter::stfsx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); } @@ -917,6 +800,7 @@ void Interpreter::sthx(Interpreter& interpreter, UGeckoInstruction inst) // lswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; @@ -926,7 +810,7 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst) if (ppc_state.msr.LE) { - GenerateAlignmentException(ppc_state, EA); + GenerateAlignmentException(ppc_state, EA, inst); return; } @@ -965,6 +849,7 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst) // todo : optimize ? // stswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; @@ -974,7 +859,7 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) if (ppc_state.msr.LE) { - GenerateAlignmentException(ppc_state, EA); + GenerateAlignmentException(ppc_state, EA, inst); return; } @@ -1006,6 +891,7 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) } // TODO: is this right? is it DSI interruptible? +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; @@ -1013,7 +899,7 @@ void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst) if (ppc_state.msr.LE) { - GenerateAlignmentException(ppc_state, EA); + GenerateAlignmentException(ppc_state, EA, inst); return; } @@ -1052,12 +938,6 @@ void Interpreter::lwarx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -1074,12 +954,6 @@ void Interpreter::stwcxd(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(ppc_state, address); - return; - } - if (ppc_state.reserve) { if (address == ppc_state.reserve_address) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 96c34beaadc9..1959dac4b4e3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -460,7 +460,7 @@ void Jit64::dcbz(UGeckoInstruction inst) if (emit_fast_path) { - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH2), ImmPtr(m_mmu.GetDBATTable().data())); PUSH(RSCRATCH); SHR(32, R(RSCRATCH), Imm8(PowerPC::BAT_INDEX_SHIFT)); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index f15e5e4462fc..3ea1fc147d9d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -102,8 +102,8 @@ FixupBranch EmuCodeBlock::BATAddressLookup(X64Reg addr, X64Reg tmp, const void* return J_CC(CC_NC, m_far_code.Enabled() ? Jump::Near : Jump::Short); } -FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr, - BitSet32 registers_in_use) +FixupBranch EmuCodeBlock::CheckIfBATSafeAddress(const OpArg& reg_value, X64Reg reg_addr, + BitSet32 registers_in_use) { registers_in_use[reg_addr] = true; if (reg_value.IsSimpleReg()) @@ -118,7 +118,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ if (reg_addr != RSCRATCH_EXTRA) MOV(32, R(RSCRATCH_EXTRA), R(reg_addr)); - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH), ImmPtr(m_jit.m_mmu.GetDBATTable().data())); SHR(32, R(RSCRATCH_EXTRA), Imm8(PowerPC::BAT_INDEX_SHIFT)); TEST(32, MComplex(RSCRATCH, RSCRATCH_EXTRA, SCALE_4, 0), Imm32(PowerPC::BAT_PHYSICAL_BIT)); @@ -131,6 +131,13 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ return J_CC(CC_Z, m_far_code.Enabled() ? Jump::Near : Jump::Short); } +FixupBranch EmuCodeBlock::CheckIfAlignmentSafeAddress(X64Reg reg_addr, int access_size, + UGeckoInstruction inst) +{ + TEST(32, R(reg_addr), Imm32(PowerPC::GetAlignmentMask(access_size))); + return J_CC(CC_NZ, m_far_code.Enabled() ? Jump::Near : Jump::Short); +} + void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, MovInfo* info) { @@ -321,11 +328,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, bool signExtend, int flags) { bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0; + bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, accessSize); auto& js = m_jit.js; registersInUse[reg_value] = false; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !force_slow_access) + !force_slow_access && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -379,13 +388,21 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(R(reg_value), reg_addr, registersInUse); + UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(Jump::Near); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe @@ -444,7 +461,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc bool signExtend) { // If the address is known to be RAM, just load it directly. - if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize)) + if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize, inst)) { UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend); return; @@ -499,13 +516,15 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces { bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0; + bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, accessSize); // set the correct immediate format reg_value = FixImmediate(accessSize, reg_value); auto& js = m_jit.js; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !force_slow_access) + !force_slow_access && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -555,13 +574,21 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(reg_value, reg_addr, registersInUse); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(Jump::Near); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe @@ -661,7 +688,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, m_jit.js.fifoBytesSinceCheck += accessSize >> 3; return false; } - else if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize)) + else if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize, inst)) { WriteToConstRamAddress(accessSize, arg, address); return false; diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 1a3e8ae3d96d..e52376d8f800 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -54,8 +54,11 @@ class EmuCodeBlock : public Gen::X64CodeBlock // Jumps to the returned FixupBranch if lookup fails. Gen::FixupBranch BATAddressLookup(Gen::X64Reg addr, Gen::X64Reg tmp, const void* bat_table); - Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, - BitSet32 registers_in_use); + Gen::FixupBranch CheckIfBATSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, + BitSet32 registers_in_use); + Gen::FixupBranch CheckIfAlignmentSafeAddress(Gen::X64Reg reg_addr, int access_size, + UGeckoInstruction inst); + // these return the address of the MOV, for backpatching void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 3fefeccf46a0..589b1b55e273 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -71,6 +71,16 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, MemAccess if (emit_fast_access) { + if (emit_slow_access && jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, access_size)) + { + const u32 mask = PowerPC::GetAlignmentMask(access_size); + TST(addr, LogicalImm(mask, GPRSize::B32)); + FixupBranch fast = B(CCFlags::CC_EQ); + slow_access_fixup = emitting_routine ? B() : BL(); + SetJumpTarget(fast); + } + ARM64Reg memory_base = MEM_REG; ARM64Reg memory_offset = addr; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index a7f3127372f0..e022879d8894 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -138,7 +138,7 @@ void JitArm64::SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 off if (is_immediate) mmio_address = m_mmu.IsOptimizableMMIOAccess(imm_addr, access_size); - if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) + if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size, inst)) { set_addr_reg_if_needed(); EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, @@ -310,7 +310,7 @@ void JitArm64::SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 js.fifoBytesSinceCheck += accessSize >> 3; } - else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) + else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size, inst)) { set_addr_reg_if_needed(); EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 6e148df86913..3ebefac9e552 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -174,7 +174,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; - if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) + if (is_immediate && + m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst)) { EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); @@ -400,7 +401,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; } - else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) + else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst)) { set_addr_reg_if_needed(); EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 81c6e35a0151..c6092f581c25 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -76,6 +76,7 @@ const std::array*>, 24> JitB {&JitBase::m_enable_branch_following, &Config::MAIN_JIT_FOLLOW_BRANCH}, {&JitBase::m_enable_float_exceptions, &Config::MAIN_FLOAT_EXCEPTIONS}, {&JitBase::m_enable_div_by_zero_exceptions, &Config::MAIN_DIVIDE_BY_ZERO_EXCEPTIONS}, + {&JitBase::m_alignment_exceptions, &Config::MAIN_ALIGNMENT_EXCEPTIONS}, {&JitBase::m_low_dcbz_hack, &Config::MAIN_LOW_DCBZ_HACK}, {&JitBase::m_fprf, &Config::MAIN_FPRF}, {&JitBase::m_accurate_nans, &Config::MAIN_ACCURATE_NANS}, @@ -137,9 +138,11 @@ void JitBase::RefreshConfig() bool any_watchpoints = m_system.GetPowerPC().GetMemChecks().HasAny(); jo.fastmem = m_fastmem_enabled && jo.fastmem_arena && (m_ppc_state.msr.DR || !any_watchpoints) && EMM::IsExceptionHandlerSupported(); - jo.memcheck = m_system.IsMMUMode() || m_system.IsPauseOnPanicMode() || any_watchpoints; + jo.memcheck = m_system.IsMMUMode() || m_system.IsPauseOnPanicMode() || any_watchpoints || + m_alignment_exceptions; jo.fp_exceptions = m_enable_float_exceptions; jo.div_by_zero_exceptions = m_enable_div_by_zero_exceptions; + jo.alignment_exceptions = m_alignment_exceptions; } void JitBase::InitFastmemArena() diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 5f3df10e3fec..7a52bec6e50d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -83,6 +83,7 @@ class JitBase : public CPUCoreBase bool accurateSinglePrecision; bool fastmem; bool fastmem_arena; + bool alignment_exceptions; bool memcheck; bool fp_exceptions; bool div_by_zero_exceptions; @@ -153,6 +154,7 @@ class JitBase : public CPUCoreBase bool m_enable_branch_following = false; bool m_enable_float_exceptions = false; bool m_enable_div_by_zero_exceptions = false; + bool m_alignment_exceptions = false; bool m_low_dcbz_hack = false; bool m_fprf = false; bool m_accurate_nans = false; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 9d3961c5f136..71f069b8984d 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -25,6 +25,7 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include @@ -44,6 +45,7 @@ #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/GDBStub.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -154,22 +156,6 @@ T MMU::ReadFromHardware(const u32 effective_address, const UGeckoInstruction ins static_assert(flag == XCheckTLBFlag::NoException || flag == XCheckTLBFlag::Read || flag == XCheckTLBFlag::OpcodeNoException); - const u32 effective_start_page = effective_address & ~HW_PAGE_MASK; - const u32 effective_end_page = (effective_address + sizeof(T) - 1) & ~HW_PAGE_MASK; - if (effective_start_page != effective_end_page) - { - // This could be unaligned down to the byte level... hopefully this is rare, so doing it this - // way isn't too terrible. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! - u64 var = 0; - for (u32 i = 0; i < sizeof(T); ++i) - { - var = (var << 8) | ReadFromHardware(effective_address + i, inst); - } - return static_cast(var); - } - u32 physical_address; bool wi; @@ -192,6 +178,27 @@ T MMU::ReadFromHardware(const u32 effective_address, const UGeckoInstruction ins wi = false; } + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(effective_address, sizeof(T) << 3, inst, wi)) + { + GenerateAlignmentException(m_ppc_state, effective_address, inst); + return 0; + } + + const u32 effective_start_page = effective_address & ~HW_PAGE_MASK; + const u32 effective_end_page = (effective_address + sizeof(T) - 1) & ~HW_PAGE_MASK; + if (effective_start_page != effective_end_page) + { + // This could be unaligned down to the byte level... hopefully this is rare, so doing it this + // way isn't too terrible. + u64 var = 0; + for (u32 i = 0; i < sizeof(T); ++i) + { + var = (var << 8) | ReadFromHardware(effective_address + i, inst); + } + return static_cast(var); + } + if (flag == XCheckTLBFlag::Read && (physical_address & 0xF8000000) == 0x08000000) { if (physical_address < 0x0c000000) @@ -281,21 +288,6 @@ void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32 DEBUG_ASSERT(size <= 4); - const u32 effective_start_page = effective_address & ~HW_PAGE_MASK; - const u32 effective_end_page = (effective_address + size - 1) & ~HW_PAGE_MASK; - if (effective_start_page != effective_end_page) - { - // The write crosses a page boundary. Break it up into two writes. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! - const u32 first_half_size = effective_end_page - effective_address; - const u32 second_half_size = size - first_half_size; - WriteToHardware(effective_address, std::rotr(data, second_half_size * 8), - first_half_size, inst); - WriteToHardware(effective_end_page, data, second_half_size, inst); - return; - } - u32 physical_address; bool wi; @@ -318,6 +310,26 @@ void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32 wi = false; } + if (flag == XCheckTLBFlag::Write && + AccessCausesAlignmentException(effective_address, size << 3, inst, wi)) + { + GenerateAlignmentException(m_ppc_state, effective_address, inst); + return; + } + + const u32 effective_start_page = effective_address & ~HW_PAGE_MASK; + const u32 effective_end_page = (effective_address + size - 1) & ~HW_PAGE_MASK; + if (effective_start_page != effective_end_page) + { + // The write crosses a page boundary. Break it up into two writes. + const u32 first_half_size = effective_end_page - effective_address; + const u32 second_half_size = size - first_half_size; + WriteToHardware(effective_address, std::rotr(data, second_half_size * 8), + first_half_size, inst); + WriteToHardware(effective_end_page, data, second_half_size, inst); + return; + } + // Check for a gather pipe write (which are not implemented through the MMIO system). // // Note that we must mask the address to correctly emulate certain games; Pac-Man World 3 @@ -941,7 +953,8 @@ std::optional> MMU::HostTryReadString(const Core::CPUThr return ReadResult(c->translated, std::move(s)); } -bool MMU::IsOptimizableRAMAddress(const u32 address, const u32 access_size) const +bool MMU::IsOptimizableRAMAddress(const u32 address, const u32 access_size, + const UGeckoInstruction inst) const { if (m_power_pc.GetMemChecks().HasAny()) return false; @@ -952,6 +965,12 @@ bool MMU::IsOptimizableRAMAddress(const u32 address, const u32 access_size) cons if (m_ppc_state.m_enable_dcache) return false; + if ((address & GetAlignmentMask(access_size)) != 0 && + AccessCausesAlignmentExceptionIfMisaligned(inst, access_size)) + { + return false; + } + // We store whether an access can be optimized to an unchecked access // in dbat_table. const u32 last_byte_address = address + (access_size >> 3) - 1; @@ -1252,7 +1271,7 @@ u32 MMU::IsOptimizableMMIOAccess(u32 address, u32 access_size) const return 0; // Check whether the address is an aligned address of an MMIO register. - const bool aligned = (address & ((access_size >> 3) - 1)) == 0; + const bool aligned = (address & GetAlignmentMask(access_size)) == 0; if (!aligned || !MMIO::IsMMIOAddress(address, m_system.IsWii())) return 0; @@ -1761,4 +1780,76 @@ void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst) { mmu.Write_U64_Swap(var, address, inst); } + +static bool IsDcbz(UGeckoInstruction inst) +{ + // dcbz, dcbz_l + return inst.SUBOP10 == 1014 && (inst.OPCD == 31 || inst.OPCD == 4); +} + +static bool IsFloat(UGeckoInstruction inst, size_t access_size) +{ + // Floating loadstore + if (inst.OPCD >= 48 && inst.OPCD < 56) + return true; + + // Paired non-indexed loadstore + if (inst.OPCD >= 56 && inst.OPCD < 62) + return access_size == (inst.W ? 32 : 64); + + // Paired indexed loadstore + if (inst.OPCD == 4 && inst.SUBOP10 != 1014) + return access_size == (inst.Wx ? 32 : 64); + + return false; +} + +static bool IsMultiword(UGeckoInstruction inst) +{ + // lmw, stmw + if (inst.OPCD == 46 || inst.OPCD == 47) + return true; + + if (inst.OPCD != 31) + return false; + + // lswx, lswi, stswx, stswi + return inst.SUBOP10 == 533 || inst.SUBOP10 == 597 || inst.SUBOP10 == 661 || inst.SUBOP10 == 725; +} + +static bool IsLwarxOrStwcx(UGeckoInstruction inst) +{ + // lwarx, stwcx + return inst.OPCD == 31 && (inst.SUBOP10 == 20 || inst.SUBOP10 == 150); +} + +static bool IsEciwxOrEcowx(UGeckoInstruction inst) +{ + // eciwx, ecowx + return inst.OPCD == 31 && (inst.SUBOP10 == 310 || inst.SUBOP10 == 438); +} + +bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst) +{ + return IsDcbz(inst); +} + +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, size_t access_size) +{ + return IsFloat(inst, access_size) || IsMultiword(inst) || IsLwarxOrStwcx(inst) || + IsEciwxOrEcowx(inst); +} + +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi) +{ + if (wi && AccessCausesAlignmentExceptionIfWi(inst)) + return true; + + if ((effective_address & GetAlignmentMask(access_size)) == 0) + return false; + + return AccessCausesAlignmentExceptionIfMisaligned(inst, access_size); +} + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index bb2e0ea4e658..e91d6d09de16 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -248,7 +248,7 @@ class MMU // Result changes based on the BAT registers and MSR.DR. Returns whether // it's safe to optimize a read or write to this address to an unguarded // memory access. Does not consider page tables. - bool IsOptimizableRAMAddress(u32 address, u32 access_size) const; + bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst) const; u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) const; bool IsOptimizableGatherPipeWrite(u32 address) const; @@ -345,4 +345,15 @@ void WriteU64FromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst); void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst); + +constexpr u32 GetAlignmentMask(size_t size) +{ + return static_cast(std::min(4, size >> 3) - 1); +} + +bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst); +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, size_t access_size); +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi); + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 911f43087b00..783ecd60ae88 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -582,8 +582,7 @@ void PowerPCManager::CheckExceptions() m_ppc_state.msr.LE = m_ppc_state.msr.ILE; m_ppc_state.msr.Hex &= ~0x04EF36; m_ppc_state.pc = m_ppc_state.npc = 0x00000600; - - // TODO crazy amount of DSISR options to check out + // DSISR and DAR regs are changed in GenerateAlignmentException() DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT"); m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT;