diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index ce3c0a510336..794939fef289 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -657,11 +657,16 @@ static constexpr u32 MaskImm26(s64 distance) } // FixupBranch branching -void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch) +{ + SetJumpTarget(branch, m_code); +} + +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { bool Not = false; u32 inst = 0; - s64 distance = (s64)(m_code - branch.ptr); + s64 distance = static_cast(target - branch.ptr); distance >>= 2; switch (branch.type) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index a5a4c03e4eae..6fa6b4fac817 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -790,7 +790,8 @@ class ARM64XEmitter u8* GetWritableCodePtr(); // FixupBranch branching - void SetJumpTarget(FixupBranch const& branch); + void SetJumpTarget(const FixupBranch& branch); + void SetJumpTarget(const FixupBranch& branch, const u8* target); FixupBranch CBZ(ARM64Reg Rt); FixupBranch CBNZ(ARM64Reg Rt); FixupBranch B(CCFlags cond); diff --git a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h index 9ad4e04cdc7f..2fd42a953a51 100644 --- a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h @@ -7,10 +7,17 @@ #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/PowerPC.h" -inline void GenerateAlignmentException(u32 address) +inline void GenerateAlignmentException(u32 effective_address, UGeckoInstruction inst) { PowerPC::ppcState.Exceptions |= EXCEPTION_ALIGNMENT; - PowerPC::ppcState.spr[SPR_DAR] = address; + PowerPC::ppcState.spr[SPR_DAR] = effective_address; + + // It has not been hardware tested what gets used instead of RD and RA in + // the cases documented as undefined. For now, simply use RD and RA + const bool x = inst.OPCD >= 32; + const u32 op = x ? inst.SUBOP10 : (inst.OPCD >> 1); + const u32 dsisr = ((op >> 8) << 15) | ((op & 0b11111) << 10) | (inst.RD << 5) | (inst.RA); + PowerPC::ppcState.spr[SPR_DSISR] = dsisr; } inline void GenerateDSIException(u32 address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index f6f6be98b7fe..b918a26d2479 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -58,13 +58,6 @@ void Interpreter::lbzu(UGeckoInstruction inst) void Interpreter::lfd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -74,13 +67,6 @@ void Interpreter::lfd(UGeckoInstruction inst) void Interpreter::lfdu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -93,13 +79,6 @@ void Interpreter::lfdu(UGeckoInstruction inst) void Interpreter::lfdux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -112,13 +91,6 @@ void Interpreter::lfdux(UGeckoInstruction inst) void Interpreter::lfdx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -128,13 +100,6 @@ void Interpreter::lfdx(UGeckoInstruction inst) void Interpreter::lfs(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -147,13 +112,6 @@ void Interpreter::lfs(UGeckoInstruction inst) void Interpreter::lfsu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -167,13 +125,6 @@ void Interpreter::lfsu(UGeckoInstruction inst) void Interpreter::lfsux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -187,13 +138,6 @@ void Interpreter::lfsux(UGeckoInstruction inst) void Interpreter::lfsx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -252,9 +196,9 @@ void Interpreter::lmw(UGeckoInstruction inst) { u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0 || MSR.LE) + if (MSR.LE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } @@ -283,9 +227,9 @@ void Interpreter::stmw(UGeckoInstruction inst) { u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0 || MSR.LE) + if (MSR.LE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } @@ -344,12 +288,6 @@ void Interpreter::stfd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); } @@ -357,12 +295,6 @@ void Interpreter::stfdu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -374,12 +306,6 @@ void Interpreter::stfs(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); } @@ -387,12 +313,6 @@ void Interpreter::stfsu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -501,7 +421,7 @@ void Interpreter::dcbz(UGeckoInstruction inst) if (!HID0.DCE) { - GenerateAlignmentException(dcbz_addr); + GenerateAlignmentException(dcbz_addr, inst); return; } @@ -525,7 +445,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) if (!HID0.DCE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } @@ -545,13 +465,11 @@ void Interpreter::eciwx(UGeckoInstruction inst) return; } - if (EA & 3) + const u32 temp = PowerPC::Read_U32(EA, inst); + if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { - GenerateAlignmentException(EA); - return; + rGPR[inst.RD] = temp; } - - rGPR[inst.RD] = PowerPC::Read_U32(EA, inst); } void Interpreter::ecowx(UGeckoInstruction inst) @@ -564,12 +482,6 @@ void Interpreter::ecowx(UGeckoInstruction inst) return; } - if (EA & 3) - { - GenerateAlignmentException(EA); - return; - } - PowerPC::Write_U32(rGPR[inst.RS], EA, inst); } @@ -666,13 +578,14 @@ void Interpreter::lhzx(UGeckoInstruction inst) } // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswx(UGeckoInstruction inst) { u32 EA = Helper_Get_EA_X(PowerPC::ppcState, inst); if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -752,12 +665,6 @@ void Interpreter::stfdux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -769,12 +676,6 @@ void Interpreter::stfdx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); } @@ -783,12 +684,6 @@ void Interpreter::stfiwx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address, inst); } @@ -796,12 +691,6 @@ void Interpreter::stfsux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -813,12 +702,6 @@ void Interpreter::stfsx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); } @@ -845,6 +728,7 @@ void Interpreter::sthx(UGeckoInstruction inst) // lswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswi(UGeckoInstruction inst) { u32 EA; @@ -855,7 +739,7 @@ void Interpreter::lswi(UGeckoInstruction inst) if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -896,6 +780,7 @@ void Interpreter::lswi(UGeckoInstruction inst) // todo : optimize ? // stswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswi(UGeckoInstruction inst) { u32 EA; @@ -906,7 +791,7 @@ void Interpreter::stswi(UGeckoInstruction inst) if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -940,13 +825,14 @@ void Interpreter::stswi(UGeckoInstruction inst) } // TODO: is this right? is it DSI interruptible? +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswx(UGeckoInstruction inst) { u32 EA = Helper_Get_EA_X(PowerPC::ppcState, inst); if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -983,12 +869,6 @@ void Interpreter::lwarx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) @@ -1004,12 +884,6 @@ void Interpreter::stwcxd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - if (PowerPC::ppcState.reserve) { if (address == PowerPC::ppcState.reserve_address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index 44da0199d7e8..1065d9af28b4 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -89,7 +89,7 @@ static std::array primarytable = {60, Interpreter::psq_st, {"psq_st", OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {61, Interpreter::psq_stu, {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - //missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 + //missing: 0, 1, 2, 5, 6, 9, 22, 30, 58, 62 }}; static std::array table4 = diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index d6da2015ac11..71147b08a440 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -332,7 +332,7 @@ void Jit64::dcbz(UGeckoInstruction inst) if (emit_fast_path) { - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0])); PUSH(RSCRATCH); SHR(32, R(RSCRATCH), Imm8(PowerPC::BAT_INDEX_SHIFT)); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index c26b00f5ed46..a223927b172c 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -101,8 +101,8 @@ FixupBranch EmuCodeBlock::BATAddressLookup(X64Reg addr, X64Reg tmp, const void* return J_CC(CC_NC, m_far_code.Enabled()); } -FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr, - BitSet32 registers_in_use) +FixupBranch EmuCodeBlock::CheckIfBATSafeAddress(const OpArg& reg_value, X64Reg reg_addr, + BitSet32 registers_in_use) { registers_in_use[reg_addr] = true; if (reg_value.IsSimpleReg()) @@ -117,7 +117,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ if (reg_addr != RSCRATCH_EXTRA) MOV(32, R(RSCRATCH_EXTRA), R(reg_addr)); - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH), ImmPtr(&PowerPC::dbat_table[0])); SHR(32, R(RSCRATCH_EXTRA), Imm8(PowerPC::BAT_INDEX_SHIFT)); TEST(32, MComplex(RSCRATCH, RSCRATCH_EXTRA, SCALE_4, 0), Imm32(PowerPC::BAT_PHYSICAL_BIT)); @@ -130,6 +130,13 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ return J_CC(CC_Z, m_far_code.Enabled()); } +FixupBranch EmuCodeBlock::CheckIfAlignmentSafeAddress(X64Reg reg_addr, int access_size, + UGeckoInstruction inst) +{ + TEST(32, R(reg_addr), Imm32(PowerPC::GetAlignmentMask(access_size))); + return J_CC(CC_NZ, m_far_code.Enabled()); +} + void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { @@ -331,12 +338,14 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, s32 offset, UGeckoInstruction inst, BitSet32 registersInUse, bool signExtend, int flags) { - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst); auto& js = m_jit.js; registersInUse[reg_value] = false; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !slowmem && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -388,13 +397,21 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(R(reg_value), reg_addr, registersInUse); + UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(true); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // Helps external systems know which instruction triggered the read. @@ -450,7 +467,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc bool signExtend) { // If the address is known to be RAM, just load it directly. - if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) + if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address, accessSize, inst)) { UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend); return; @@ -502,15 +519,17 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, UGeckoInstruction inst, BitSet32 registersInUse, int flags) { - bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); + const bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, false); // set the correct immediate format reg_value = FixImmediate(accessSize, reg_value); auto& js = m_jit.js; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !slowmem && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -558,13 +577,21 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(reg_value, reg_addr, registersInUse); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(true); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs @@ -661,7 +688,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, m_jit.js.fifoBytesSinceCheck += accessSize >> 3; return false; } - else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) + else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address, accessSize, inst)) { WriteToConstRamAddress(accessSize, arg, address); return false; diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 63e0d6cc3e9c..a544ee6e3b60 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -54,8 +54,10 @@ class EmuCodeBlock : public Gen::X64CodeBlock // Jumps to the returned FixupBranch if lookup fails. Gen::FixupBranch BATAddressLookup(Gen::X64Reg addr, Gen::X64Reg tmp, const void* bat_table); - Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, - BitSet32 registers_in_use); + Gen::FixupBranch CheckIfBATSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, + BitSet32 registers_in_use); + Gen::FixupBranch CheckIfAlignmentSafeAddress(Gen::X64Reg reg_addr, int access_size, + UGeckoInstruction inst); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 5c5f8e54d9dd..3e2cb4be2253 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -58,8 +58,21 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fast bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); + FixupBranch slowmem_fixup; + bool check_alignment = fastmem && do_farcode && jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst); + if (fastmem) { + if (check_alignment) + { + const u32 mask = PowerPC::GetAlignmentMask(BackPatchInfo::GetFlagSize(flags)); + TST(addr, LogicalImm(mask, 32)); + FixupBranch fast = B(CCFlags::CC_EQ); + slowmem_fixup = BL(); + SetJumpTarget(fast); + } + if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT) { if (flags & BackPatchInfo::FLAG_SIZE_F32) @@ -154,10 +167,17 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fast const u8* handler_loc = handler_loc_iter->second; fastmem_area->slowmem_code = handler_loc; fastmem_area->length = fastmem_end - fastmem_start; + + if (check_alignment) + SetJumpTarget(slowmem_fixup, handler_loc); + return; } } + if (check_alignment) + SetJumpTarget(slowmem_fixup); + ABI_PushRegisters(gprs_to_push); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 8ae4dbc1d724..8f7dd59ab525 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -121,7 +121,8 @@ void JitArm64::SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 off if (is_immediate) mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { EmitBackpatchRoutine(inst, flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); } @@ -256,7 +257,8 @@ void JitArm64::SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 js.fifoBytesSinceCheck += accessSize >> 3; } - else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { MOVI2R(XA, imm_addr); EmitBackpatchRoutine(inst, flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 3050bdc330ab..ee758dd1fea1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -167,7 +167,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(VD)] = 0; - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst)) { EmitBackpatchRoutine(inst, flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); } @@ -356,16 +357,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; + u32 access_size = BackPatchInfo::GetFlagSize(flags); + if (is_immediate) { if (jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) { - int accessSize; - if (flags & BackPatchInfo::FLAG_SIZE_F64) - accessSize = 64; - else - accessSize = 32; - LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (flags & BackPatchInfo::FLAG_SIZE_F64) @@ -373,11 +370,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (flags & BackPatchInfo::FLAG_SIZE_F32) m_float_emit.REV32(8, ARM64Reg::D0, V0); - m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, - ARM64Reg::X0, accessSize >> 3); + m_float_emit.STR(access_size, IndexType::Post, + access_size == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, ARM64Reg::X0, + access_size >> 3); STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); - js.fifoBytesSinceCheck += accessSize >> 3; + js.fifoBytesSinceCheck += access_size >> 3; if (update) { @@ -386,7 +384,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOVI2R(gpr.R(a), imm_addr); } } - else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { EmitBackpatchRoutine(inst, flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index c42fce554ec8..a1a1e96153c5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -45,5 +45,6 @@ void JitBase::UpdateMemoryOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints); - jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; + jo.alignment_exceptions = SConfig::GetInstance().bAlignmentExceptions; + jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints || jo.alignment_exceptions; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 18784ff095b2..43949daf354a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -62,6 +62,7 @@ class JitBase : public CPUCoreBase bool accurateSinglePrecision; bool fastmem; bool fastmem_arena; + bool alignment_exceptions; bool memcheck; bool profile_blocks; }; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index d9dea7b48206..aba0b48a915a 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -3,6 +3,7 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include @@ -20,6 +21,7 @@ #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" @@ -190,12 +192,18 @@ static T ReadFromHardware(u32 em_address, UGeckoInstruction inst) GenerateDSIException(em_address, false); return 0; } + + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, translated_addr.wi)) + { + GenerateAlignmentException(em_address, inst); + return 0; + } + if ((em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) { // This could be unaligned down to the byte level... hopefully this is rare, so doing it this // way isn't too terrible. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1); auto addr_next_page = TranslateAddress(em_address_next_page); if (!addr_next_page.Success()) @@ -216,6 +224,15 @@ static T ReadFromHardware(u32 em_address, UGeckoInstruction inst) } em_address = translated_addr.address; } + else + { + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, false)) + { + GenerateAlignmentException(em_address, inst); + return 0; + } + } // TODO: Make sure these are safe for unaligned addresses. @@ -277,8 +294,6 @@ static void WriteToHardware(u32 em_address, const u32 data, const u32 size, UGec if (em_address_start_page != em_address_end_page) { // The write crosses a page boundary. Break it up into two writes. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! const u32 first_half_size = em_address_end_page - em_address; const u32 second_half_size = size - first_half_size; WriteToHardware( @@ -302,6 +317,13 @@ static void WriteToHardware(u32 em_address, const u32 data, const u32 size, UGec wi = translated_addr.wi; } + if (flag == XCheckTLBFlag::Write && + AccessCausesAlignmentException(em_address, size << 3, inst, wi)) + { + GenerateAlignmentException(em_address, inst); + return; + } + // Check for a gather pipe write. // Note that we must mask the address to correctly emulate certain games; // Pac-Man World 3 in particular is affected by this. @@ -867,7 +889,7 @@ TryReadResult HostTryReadString(u32 address, size_t size, Requested return TryReadResult(c.translated, std::move(s)); } -bool IsOptimizableRAMAddress(const u32 address) +bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst) { if (PowerPC::memchecks.HasAny()) return false; @@ -875,8 +897,12 @@ bool IsOptimizableRAMAddress(const u32 address) if (!MSR.DR) return false; - // TODO: This API needs to take an access size - // + if ((address & GetAlignmentMask(access_size)) != 0 && + AccessCausesAlignmentExceptionIfMisaligned(inst)) + { + return false; + } + // We store whether an access can be optimized to an unchecked access // in dbat_table. u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT]; @@ -1072,7 +1098,7 @@ u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) return 0; // Check whether the address is an aligned address of an MMIO register. - const bool aligned = (address & ((access_size >> 3) - 1)) == 0; + const bool aligned = (address & GetAlignmentMask(access_size)) == 0; if (!aligned || !MMIO::IsMMIOAddress(address)) return 0; @@ -1608,4 +1634,68 @@ std::optional GetTranslatedAddress(u32 address) return std::optional(result.address); } +static bool IsDcbz(UGeckoInstruction inst) +{ + // dcbz, dcbz_l + return inst.SUBOP10 == 1014 && (inst.OPCD == 31 || inst.OPCD == 13); +} + +static bool IsFloat(UGeckoInstruction inst) +{ + // Floating loadstore, paired loadstore (exluding dcbz_l) + return (inst.OPCD >= 48 && inst.OPCD < 62) || (inst.OPCD == 4 && inst.SUBOP10 != 1014); + + // TODO: "In 750CL, the paired-single quantization load or store generates an alignment exception + // if the operand is not word-aligned when the corresponding GQRn[LD_TYPE] or GQRn[ST_TYPE] are + // type 0 and does not generate an alignment exception when the corresponding GQRn[LD_TYPE] or + // GQRn[ST_TYPE] are 4, 5, 6 or 7." Right now we treat it as always being type 0 +} + +static bool IsMultiword(UGeckoInstruction inst) +{ + // lmw, stmw + if (inst.OPCD == 46 || inst.OPCD == 47) + return true; + + if (inst.OPCD != 31) + return false; + + // lswx, lswi, stswx, stswi + return inst.SUBOP10 == 533 || inst.SUBOP10 == 597 || inst.SUBOP10 == 661 || inst.SUBOP10 == 725; +} + +static bool IsLwarxOrStwcx(UGeckoInstruction inst) +{ + // lwarx, stwcx + return inst.OPCD == 31 && (inst.SUBOP10 == 20 || inst.SUBOP10 == 150); +} + +static bool IsEciwxOrEcowx(UGeckoInstruction inst) +{ + // eciwx, ecowx + return inst.OPCD == 31 && (inst.SUBOP10 == 310 || inst.SUBOP10 == 438); +} + +bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst) +{ + return IsDcbz(inst); +} + +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst) +{ + return IsFloat(inst) || IsMultiword(inst) || IsLwarxOrStwcx(inst) || IsEciwxOrEcowx(inst); +} + +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi) +{ + if (wi && AccessCausesAlignmentExceptionIfWi(inst)) + return true; + + if ((effective_address & GetAlignmentMask(access_size)) == 0) + return false; + + return AccessCausesAlignmentExceptionIfMisaligned(inst); +} + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 4c886fcdadba..4c386df19025 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -185,7 +185,7 @@ void IBATUpdated(); // Result changes based on the BAT registers and MSR.DR. Returns whether // it's safe to optimize a read or write to this address to an unguarded // memory access. Does not consider page tables. -bool IsOptimizableRAMAddress(u32 address); +bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst); u32 IsOptimizableMMIOAccess(u32 address, u32 access_size); bool IsOptimizableGatherPipeWrite(u32 address); @@ -225,4 +225,15 @@ inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi } std::optional GetTranslatedAddress(u32 address); + +constexpr u32 GetAlignmentMask(size_t size) +{ + return static_cast(std::min(4, size >> 3) - 1); +} + +bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst); +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst); +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi); + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 1f964b54b7cd..86e58d056647 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -538,8 +538,7 @@ void CheckExceptions() MSR.LE = MSR.ILE; MSR.Hex &= ~0x04EF36; PC = NPC = 0x00000600; - - // TODO crazy amount of DSISR options to check out + // DSISR and DAR regs are changed in GenerateAlignmentException() DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT"); ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT;