From b5e68062d3d3bdf196d543d1f7de4d5e4e0a6762 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 4 Jul 2021 20:47:04 +0200 Subject: [PATCH] PowerPC: Pass current instruction to Read/Write functions --- Source/Core/Common/x64Emitter.h | 41 +++++ Source/Core/Core/Boot/Boot_BS2Emu.cpp | 12 +- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 12 +- Source/Core/Core/HLE/HLE_OS.cpp | 2 +- .../Interpreter/Interpreter_LoadStore.cpp | 124 ++++++------- .../Interpreter_LoadStorePaired.cpp | 118 ++++++------- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 14 +- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 8 +- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 57 +++--- .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 13 +- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 12 +- .../PowerPC/Jit64Common/TrampolineCache.cpp | 7 +- .../Core/PowerPC/Jit64Common/TrampolineInfo.h | 4 + Source/Core/Core/PowerPC/JitArm64/Jit.h | 43 +++-- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 56 +++--- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 30 ++-- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 11 +- .../JitArm64/JitArm64_LoadStorePaired.cpp | 37 ++-- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 98 +++++------ Source/Core/Core/PowerPC/MMU.cpp | 163 +++++++++--------- Source/Core/Core/PowerPC/MMU.h | 57 +++--- 21 files changed, 517 insertions(+), 402 deletions(-) diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 7a7e1a695bac..48ef7a8c3727 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -1090,6 +1090,15 @@ class XEmitter ABI_CallFunction(func); } + template + void ABI_CallFunctionPCC(FunctionPointer func, const void* param1, u32 param2, u32 param3) + { + MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast(param1))); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + ABI_CallFunction(func); + } + template void ABI_CallFunctionPPC(FunctionPointer func, const void* param1, const void* param2, u32 param3) { @@ -1118,6 +1127,16 @@ class XEmitter ABI_CallFunction(func); } + template + void ABI_CallFunctionPRC(FunctionPointer func, const void* ptr, X64Reg reg1, u32 param3) + { + if (reg1 != ABI_PARAM2) + MOV(64, R(ABI_PARAM2), R(reg1)); + MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast(ptr))); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + ABI_CallFunction(func); + } + // Pass two registers as parameters. template void ABI_CallFunctionRR(FunctionPointer func, X64Reg reg1, X64Reg reg2) @@ -1135,6 +1154,16 @@ class XEmitter ABI_CallFunction(func); } + template + void ABI_CallFunctionPRRC(FunctionPointer func, const void* ptr, X64Reg reg1, X64Reg reg2, + u32 param4) + { + MOVTwo(64, ABI_PARAM2, reg1, 0, ABI_PARAM3, reg2); + MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast(ptr))); + MOV(32, R(ABI_PARAM4), Imm32(param4)); + ABI_CallFunction(func); + } + template void ABI_CallFunctionAC(int bits, FunctionPointer func, const Gen::OpArg& arg1, u32 param2) { @@ -1150,8 +1179,20 @@ class XEmitter { if (!arg2.IsSimpleReg(ABI_PARAM2)) MOV(bits, R(ABI_PARAM2), arg2); + MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast(ptr1))); MOV(32, R(ABI_PARAM3), Imm32(param3)); + ABI_CallFunction(func); + } + + template + void ABI_CallFunctionPACC(int bits, FunctionPointer func, const void* ptr1, + const Gen::OpArg& arg2, u32 param3, u32 param4) + { + if (!arg2.IsSimpleReg(ABI_PARAM2)) + MOV(bits, R(ABI_PARAM2), arg2); MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast(ptr1))); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + MOV(32, R(ABI_PARAM4), Imm32(param4)); ABI_CallFunction(func); } diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index 1e1f9d993b35..ba2481fa6b85 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -166,9 +166,9 @@ bool CBoot::RunApploader(Core::System& system, const Core::CPUThreadGuard& guard ppc_state.gpr[4] = iAppLoaderFuncAddr + 4; ppc_state.gpr[5] = iAppLoaderFuncAddr + 8; RunFunction(system, *entry); - const u32 iAppLoaderInit = mmu.Read_U32(iAppLoaderFuncAddr + 0); - const u32 iAppLoaderMain = mmu.Read_U32(iAppLoaderFuncAddr + 4); - const u32 iAppLoaderClose = mmu.Read_U32(iAppLoaderFuncAddr + 8); + const u32 iAppLoaderInit = mmu.Read_U32(iAppLoaderFuncAddr + 0, UGeckoInstruction{}); + const u32 iAppLoaderMain = mmu.Read_U32(iAppLoaderFuncAddr + 4, UGeckoInstruction{}); + const u32 iAppLoaderClose = mmu.Read_U32(iAppLoaderFuncAddr + 8, UGeckoInstruction{}); // iAppLoaderInit DEBUG_LOG_FMT(BOOT, "Call iAppLoaderInit"); @@ -194,9 +194,9 @@ bool CBoot::RunApploader(Core::System& system, const Core::CPUThreadGuard& guard // iAppLoaderMain returns 0 when there are no more sections to copy. while (ppc_state.gpr[3] != 0x00) { - const u32 ram_address = mmu.Read_U32(0x81300004); - const u32 length = mmu.Read_U32(0x81300008); - const u32 dvd_offset = mmu.Read_U32(0x8130000c) << (is_wii ? 2 : 0); + const u32 ram_address = mmu.Read_U32(0x81300004, UGeckoInstruction{}); + const u32 length = mmu.Read_U32(0x81300008, UGeckoInstruction{}); + const u32 dvd_offset = mmu.Read_U32(0x8130000c, UGeckoInstruction{}) << (is_wii ? 2 : 0); INFO_LOG_FMT(BOOT, "DVDRead: offset: {:08x} memOffset: {:08x} length: {}", dvd_offset, ram_address, length); diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index e0359905046c..3c1999969ec6 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -702,12 +702,12 @@ void FifoPlayer::LoadTextureMemory() void FifoPlayer::WriteCP(u32 address, u16 value) { - m_system.GetMMU().Write_U16(value, 0xCC000000 | address); + m_system.GetMMU().Write_U16(value, 0xCC000000 | address, UGeckoInstruction{}); } void FifoPlayer::WritePI(u32 address, u32 value) { - m_system.GetMMU().Write_U32(value, 0xCC003000 | address); + m_system.GetMMU().Write_U32(value, 0xCC003000 | address, UGeckoInstruction{}); } void FifoPlayer::FlushWGP() @@ -805,14 +805,14 @@ bool FifoPlayer::ShouldLoadXF(u8 reg) bool FifoPlayer::IsIdleSet() const { - CommandProcessor::UCPStatusReg status = - m_system.GetMMU().Read_U16(0xCC000000 | CommandProcessor::STATUS_REGISTER); + CommandProcessor::UCPStatusReg status = m_system.GetMMU().Read_U16( + 0xCC000000 | CommandProcessor::STATUS_REGISTER, UGeckoInstruction{}); return status.CommandIdle; } bool FifoPlayer::IsHighWatermarkSet() const { - CommandProcessor::UCPStatusReg status = - m_system.GetMMU().Read_U16(0xCC000000 | CommandProcessor::STATUS_REGISTER); + CommandProcessor::UCPStatusReg status = m_system.GetMMU().Read_U16( + 0xCC000000 | CommandProcessor::STATUS_REGISTER, UGeckoInstruction{}); return status.OverflowHiWatermark; } diff --git a/Source/Core/Core/HLE/HLE_OS.cpp b/Source/Core/Core/HLE/HLE_OS.cpp index 084bd0ee2517..aced305c1830 100644 --- a/Source/Core/Core/HLE/HLE_OS.cpp +++ b/Source/Core/Core/HLE/HLE_OS.cpp @@ -117,7 +117,7 @@ void HLE_write_console(const Core::CPUThreadGuard& guard) std::string report_message = GetStringVA(system, guard, 4); if (PowerPC::MMU::HostIsRAMAddress(guard, ppc_state.gpr[5])) { - const u32 size = system.GetMMU().Read_U32(ppc_state.gpr[5]); + const u32 size = system.GetMMU().HostRead_U32(guard, ppc_state.gpr[5]); if (size > report_message.size()) WARN_LOG_FMT(OSREPORT_HLE, "__write_console uses an invalid size of {:#010x}", size); else if (size == 0) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 23bc4434d35e..62e808b558c9 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -41,7 +41,7 @@ static u32 Helper_Get_EA_UX(const PowerPC::PowerPCState& ppcs, const UGeckoInstr void Interpreter::lbz(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = interpreter.m_mmu.Read_U8(Helper_Get_EA(ppc_state, inst)); + const u32 temp = interpreter.m_mmu.Read_U8(Helper_Get_EA(ppc_state, inst), inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) ppc_state.gpr[inst.RD] = temp; @@ -51,7 +51,7 @@ void Interpreter::lbzu(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U8(address); + const u32 temp = interpreter.m_mmu.Read_U8(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -71,7 +71,7 @@ void Interpreter::lfd(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u64 temp = interpreter.m_mmu.Read_U64(address); + const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) ppc_state.ps[inst.FD].SetPS0(temp); @@ -88,7 +88,7 @@ void Interpreter::lfdu(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u64 temp = interpreter.m_mmu.Read_U64(address); + const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -108,7 +108,7 @@ void Interpreter::lfdux(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u64 temp = interpreter.m_mmu.Read_U64(address); + const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -128,7 +128,7 @@ void Interpreter::lfdx(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u64 temp = interpreter.m_mmu.Read_U64(address); + const u64 temp = interpreter.m_mmu.Read_U64(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) ppc_state.ps[inst.FD].SetPS0(temp); @@ -145,7 +145,7 @@ void Interpreter::lfs(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -165,7 +165,7 @@ void Interpreter::lfsu(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -186,7 +186,7 @@ void Interpreter::lfsux(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -207,7 +207,7 @@ void Interpreter::lfsx(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -219,7 +219,7 @@ void Interpreter::lfsx(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lha(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = u32(s32(s16(interpreter.m_mmu.Read_U16(Helper_Get_EA(ppc_state, inst))))); + const u32 temp = u32(s32(s16(interpreter.m_mmu.Read_U16(Helper_Get_EA(ppc_state, inst), inst)))); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -231,7 +231,7 @@ void Interpreter::lhau(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - const u32 temp = u32(s32(s16(interpreter.m_mmu.Read_U16(address)))); + const u32 temp = u32(s32(s16(interpreter.m_mmu.Read_U16(address, inst)))); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -243,7 +243,7 @@ void Interpreter::lhau(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lhz(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = interpreter.m_mmu.Read_U16(Helper_Get_EA(ppc_state, inst)); + const u32 temp = interpreter.m_mmu.Read_U16(Helper_Get_EA(ppc_state, inst), inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -255,7 +255,7 @@ void Interpreter::lhzu(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U16(address); + const u32 temp = interpreter.m_mmu.Read_U16(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -278,7 +278,7 @@ void Interpreter::lmw(Interpreter& interpreter, UGeckoInstruction inst) for (u32 i = inst.RD; i <= 31; i++, address += 4) { - const u32 temp_reg = interpreter.m_mmu.Read_U32(address); + const u32 temp_reg = interpreter.m_mmu.Read_U32(address, inst); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { @@ -310,7 +310,7 @@ void Interpreter::stmw(Interpreter& interpreter, UGeckoInstruction inst) for (u32 i = inst.RS; i <= 31; i++, address += 4) { - interpreter.m_mmu.Write_U32(ppc_state.gpr[i], address); + interpreter.m_mmu.Write_U32(ppc_state.gpr[i], address, inst); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { PanicAlertFmt("DSI exception in stmw"); @@ -324,7 +324,7 @@ void Interpreter::lwz(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -336,7 +336,7 @@ void Interpreter::lwzu(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -348,7 +348,7 @@ void Interpreter::lwzu(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::stb(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], Helper_Get_EA(ppc_state, inst)); + interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], Helper_Get_EA(ppc_state, inst), inst); } void Interpreter::stbu(Interpreter& interpreter, UGeckoInstruction inst) @@ -356,7 +356,7 @@ void Interpreter::stbu(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -374,7 +374,7 @@ void Interpreter::stfd(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address); + interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); } void Interpreter::stfdu(Interpreter& interpreter, UGeckoInstruction inst) @@ -388,7 +388,7 @@ void Interpreter::stfdu(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address); + interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -406,7 +406,7 @@ void Interpreter::stfs(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address); + interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); } void Interpreter::stfsu(Interpreter& interpreter, UGeckoInstruction inst) @@ -420,7 +420,7 @@ void Interpreter::stfsu(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address); + interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -430,7 +430,7 @@ void Interpreter::stfsu(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::sth(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], Helper_Get_EA(ppc_state, inst)); + interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], Helper_Get_EA(ppc_state, inst), inst); } void Interpreter::sthu(Interpreter& interpreter, UGeckoInstruction inst) @@ -438,7 +438,7 @@ void Interpreter::sthu(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -448,7 +448,7 @@ void Interpreter::sthu(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::stw(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], Helper_Get_EA(ppc_state, inst)); + interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], Helper_Get_EA(ppc_state, inst), inst); } void Interpreter::stwu(Interpreter& interpreter, UGeckoInstruction inst) @@ -456,7 +456,7 @@ void Interpreter::stwu(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_U(ppc_state, inst); - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -556,7 +556,7 @@ void Interpreter::dcbz(Interpreter& interpreter, UGeckoInstruction inst) } } - interpreter.m_mmu.ClearDCacheLine(dcbz_addr & (~31)); + interpreter.m_mmu.ClearDCacheLine(dcbz_addr & (~31), inst); } void Interpreter::dcbz_l(Interpreter& interpreter, UGeckoInstruction inst) @@ -576,7 +576,7 @@ void Interpreter::dcbz_l(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.ClearDCacheLine(address & (~31)); + interpreter.m_mmu.ClearDCacheLine(address & (~31), inst); } // eciwx/ecowx technically should access the specified device @@ -598,7 +598,7 @@ void Interpreter::eciwx(Interpreter& interpreter, UGeckoInstruction inst) return; } - ppc_state.gpr[inst.RD] = interpreter.m_mmu.Read_U32(EA); + ppc_state.gpr[inst.RD] = interpreter.m_mmu.Read_U32(EA, inst); } void Interpreter::ecowx(Interpreter& interpreter, UGeckoInstruction inst) @@ -618,7 +618,7 @@ void Interpreter::ecowx(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], EA); + interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], EA, inst); } void Interpreter::eieio(Interpreter& interpreter, UGeckoInstruction inst) @@ -641,7 +641,7 @@ void Interpreter::lbzux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U8(address); + const u32 temp = interpreter.m_mmu.Read_U8(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -653,7 +653,7 @@ void Interpreter::lbzux(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lbzx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = interpreter.m_mmu.Read_U8(Helper_Get_EA_X(ppc_state, inst)); + const u32 temp = interpreter.m_mmu.Read_U8(Helper_Get_EA_X(ppc_state, inst), inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -665,7 +665,7 @@ void Interpreter::lhaux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - const s32 temp = s32{s16(interpreter.m_mmu.Read_U16(address))}; + const s32 temp = s32{s16(interpreter.m_mmu.Read_U16(address, inst))}; if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -677,7 +677,7 @@ void Interpreter::lhaux(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lhax(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const s32 temp = s32{s16(interpreter.m_mmu.Read_U16(Helper_Get_EA_X(ppc_state, inst)))}; + const s32 temp = s32{s16(interpreter.m_mmu.Read_U16(Helper_Get_EA_X(ppc_state, inst), inst))}; if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -688,7 +688,8 @@ void Interpreter::lhax(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lhbrx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = Common::swap16(interpreter.m_mmu.Read_U16(Helper_Get_EA_X(ppc_state, inst))); + const u32 temp = + Common::swap16(interpreter.m_mmu.Read_U16(Helper_Get_EA_X(ppc_state, inst), inst)); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -700,7 +701,7 @@ void Interpreter::lhzux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U16(address); + const u32 temp = interpreter.m_mmu.Read_U16(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -712,7 +713,7 @@ void Interpreter::lhzux(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lhzx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = interpreter.m_mmu.Read_U16(Helper_Get_EA_X(ppc_state, inst)); + const u32 temp = interpreter.m_mmu.Read_U16(Helper_Get_EA_X(ppc_state, inst), inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -741,7 +742,7 @@ void Interpreter::lswx(Interpreter& interpreter, UGeckoInstruction inst) if ((n & 0b11) == 0) ppc_state.gpr[reg] = 0; - const u32 temp_value = interpreter.m_mmu.Read_U8(EA) << (24 - offset); + const u32 temp_value = interpreter.m_mmu.Read_U8(EA, inst) << (24 - offset); // Not64 (Homebrew N64 Emulator for Wii) triggers the following case. if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { @@ -757,7 +758,8 @@ void Interpreter::lswx(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::lwbrx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - const u32 temp = Common::swap32(interpreter.m_mmu.Read_U32(Helper_Get_EA_X(ppc_state, inst))); + const u32 temp = + Common::swap32(interpreter.m_mmu.Read_U32(Helper_Get_EA_X(ppc_state, inst), inst)); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -769,7 +771,7 @@ void Interpreter::lwzux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -782,7 +784,7 @@ void Interpreter::lwzx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -795,7 +797,7 @@ void Interpreter::stbux(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -805,7 +807,7 @@ void Interpreter::stbux(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::stbx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], Helper_Get_EA_X(ppc_state, inst)); + interpreter.m_mmu.Write_U8(ppc_state.gpr[inst.RS], Helper_Get_EA_X(ppc_state, inst), inst); } void Interpreter::stfdux(Interpreter& interpreter, UGeckoInstruction inst) @@ -819,7 +821,7 @@ void Interpreter::stfdux(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address); + interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -837,7 +839,7 @@ void Interpreter::stfdx(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address); + interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst); } // Stores Floating points into Integers indeXed @@ -852,7 +854,7 @@ void Interpreter::stfiwx(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U32(ppc_state.ps[inst.FS].PS0AsU32(), address); + interpreter.m_mmu.Write_U32(ppc_state.ps[inst.FS].PS0AsU32(), address, inst); } void Interpreter::stfsux(Interpreter& interpreter, UGeckoInstruction inst) @@ -866,7 +868,7 @@ void Interpreter::stfsux(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address); + interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -884,13 +886,13 @@ void Interpreter::stfsx(Interpreter& interpreter, UGeckoInstruction inst) return; } - interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address); + interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst); } void Interpreter::sthbrx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - interpreter.m_mmu.Write_U16_Swap(ppc_state.gpr[inst.RS], Helper_Get_EA_X(ppc_state, inst)); + interpreter.m_mmu.Write_U16_Swap(ppc_state.gpr[inst.RS], Helper_Get_EA_X(ppc_state, inst), inst); } void Interpreter::sthux(Interpreter& interpreter, UGeckoInstruction inst) @@ -898,7 +900,7 @@ void Interpreter::sthux(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -908,7 +910,7 @@ void Interpreter::sthux(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::sthx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], Helper_Get_EA_X(ppc_state, inst)); + interpreter.m_mmu.Write_U16(ppc_state.gpr[inst.RS], Helper_Get_EA_X(ppc_state, inst), inst); } // lswi - bizarro string instruction @@ -941,7 +943,7 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst) ppc_state.gpr[r] = 0; } - const u32 temp_value = interpreter.m_mmu.Read_U8(EA) << (24 - i); + const u32 temp_value = interpreter.m_mmu.Read_U8(EA, inst) << (24 - i); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { PanicAlertFmt("DSI exception in lsw."); @@ -987,7 +989,7 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) r++; r &= 31; } - interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA); + interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA, inst); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { return; @@ -1019,7 +1021,7 @@ void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst) while (n > 0) { - interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA); + interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA, inst); EA++; n--; @@ -1037,7 +1039,7 @@ void Interpreter::stwbrx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - interpreter.m_mmu.Write_U32_Swap(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U32_Swap(ppc_state.gpr[inst.RS], address, inst); } // The following two instructions are for SMP communications. On a single @@ -1054,7 +1056,7 @@ void Interpreter::lwarx(Interpreter& interpreter, UGeckoInstruction inst) return; } - const u32 temp = interpreter.m_mmu.Read_U32(address); + const u32 temp = interpreter.m_mmu.Read_U32(address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -1080,7 +1082,7 @@ void Interpreter::stwcxd(Interpreter& interpreter, UGeckoInstruction inst) { if (address == ppc_state.reserve_address) { - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.reserve = false; @@ -1098,7 +1100,7 @@ void Interpreter::stwux(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_UX(ppc_state, inst); - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address, inst); if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION)) { ppc_state.gpr[inst.RA] = address; @@ -1110,7 +1112,7 @@ void Interpreter::stwx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 address = Helper_Get_EA_X(ppc_state, inst); - interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address); + interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], address, inst); } void Interpreter::sync(Interpreter& interpreter, UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index 7cc484684ce8..b70e2aa0608f 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -69,111 +69,112 @@ SType ScaleAndClamp(double ps, u32 st_scale) } template -static T ReadUnpaired(PowerPC::MMU& mmu, u32 addr); +static T ReadUnpaired(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst); template <> -u8 ReadUnpaired(PowerPC::MMU& mmu, u32 addr) +u8 ReadUnpaired(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst) { - return mmu.Read_U8(addr); + return mmu.Read_U8(addr, inst); } template <> -u16 ReadUnpaired(PowerPC::MMU& mmu, u32 addr) +u16 ReadUnpaired(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst) { - return mmu.Read_U16(addr); + return mmu.Read_U16(addr, inst); } template <> -u32 ReadUnpaired(PowerPC::MMU& mmu, u32 addr) +u32 ReadUnpaired(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst) { - return mmu.Read_U32(addr); + return mmu.Read_U32(addr, inst); } template -static std::pair ReadPair(PowerPC::MMU& mmu, u32 addr); +static std::pair ReadPair(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst); template <> -std::pair ReadPair(PowerPC::MMU& mmu, u32 addr) +std::pair ReadPair(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst) { - const u16 val = mmu.Read_U16(addr); + const u16 val = mmu.Read_U16(addr, inst); return {u8(val >> 8), u8(val)}; } template <> -std::pair ReadPair(PowerPC::MMU& mmu, u32 addr) +std::pair ReadPair(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst) { - const u32 val = mmu.Read_U32(addr); + const u32 val = mmu.Read_U32(addr, inst); return {u16(val >> 16), u16(val)}; } template <> -std::pair ReadPair(PowerPC::MMU& mmu, u32 addr) +std::pair ReadPair(PowerPC::MMU& mmu, u32 addr, UGeckoInstruction inst) { - const u64 val = mmu.Read_U64(addr); + const u64 val = mmu.Read_U64(addr, inst); return {u32(val >> 32), u32(val)}; } template -static void WriteUnpaired(PowerPC::MMU& mmu, T val, u32 addr); +static void WriteUnpaired(PowerPC::MMU& mmu, T val, u32 addr, UGeckoInstruction inst); template <> -void WriteUnpaired(PowerPC::MMU& mmu, u8 val, u32 addr) +void WriteUnpaired(PowerPC::MMU& mmu, u8 val, u32 addr, UGeckoInstruction inst) { - mmu.Write_U8(val, addr); + mmu.Write_U8(val, addr, inst); } template <> -void WriteUnpaired(PowerPC::MMU& mmu, u16 val, u32 addr) +void WriteUnpaired(PowerPC::MMU& mmu, u16 val, u32 addr, UGeckoInstruction inst) { - mmu.Write_U16(val, addr); + mmu.Write_U16(val, addr, inst); } template <> -void WriteUnpaired(PowerPC::MMU& mmu, u32 val, u32 addr) +void WriteUnpaired(PowerPC::MMU& mmu, u32 val, u32 addr, UGeckoInstruction inst) { - mmu.Write_U32(val, addr); + mmu.Write_U32(val, addr, inst); } template -static void WritePair(PowerPC::MMU& mmu, T val1, T val2, u32 addr); +static void WritePair(PowerPC::MMU& mmu, T val1, T val2, u32 addr, UGeckoInstruction inst); template <> -void WritePair(PowerPC::MMU& mmu, u8 val1, u8 val2, u32 addr) +void WritePair(PowerPC::MMU& mmu, u8 val1, u8 val2, u32 addr, UGeckoInstruction inst) { - mmu.Write_U16((u16{val1} << 8) | u16{val2}, addr); + mmu.Write_U16((u16{val1} << 8) | u16{val2}, addr, inst); } template <> -void WritePair(PowerPC::MMU& mmu, u16 val1, u16 val2, u32 addr) +void WritePair(PowerPC::MMU& mmu, u16 val1, u16 val2, u32 addr, UGeckoInstruction inst) { - mmu.Write_U32((u32{val1} << 16) | u32{val2}, addr); + mmu.Write_U32((u32{val1} << 16) | u32{val2}, addr, inst); } template <> -void WritePair(PowerPC::MMU& mmu, u32 val1, u32 val2, u32 addr) +void WritePair(PowerPC::MMU& mmu, u32 val1, u32 val2, u32 addr, UGeckoInstruction inst) { - mmu.Write_U64((u64{val1} << 32) | u64{val2}, addr); + mmu.Write_U64((u64{val1} << 32) | u64{val2}, addr, inst); } template -void QuantizeAndStore(PowerPC::MMU& mmu, double ps0, double ps1, u32 addr, u32 instW, u32 st_scale) +void QuantizeAndStore(PowerPC::MMU& mmu, UGeckoInstruction inst, double ps0, double ps1, u32 addr, + u32 instW, u32 st_scale) { using U = std::make_unsigned_t; const U conv_ps0 = U(ScaleAndClamp(ps0, st_scale)); if (instW) { - WriteUnpaired(mmu, conv_ps0, addr); + WriteUnpaired(mmu, conv_ps0, addr, inst); } else { const U conv_ps1 = U(ScaleAndClamp(ps1, st_scale)); - WritePair(mmu, conv_ps0, conv_ps1, addr); + WritePair(mmu, conv_ps0, conv_ps1, addr, inst); } } -static void Helper_Quantize(PowerPC::MMU& mmu, const PowerPC::PowerPCState* ppcs, u32 addr, - u32 instI, u32 instRS, u32 instW) +static void Helper_Quantize(PowerPC::MMU& mmu, const PowerPC::PowerPCState* ppcs, + UGeckoInstruction inst, u32 addr, u32 instI, u32 instRS, u32 instW) { const UGQR gqr(ppcs->spr[SPR_GQR0 + instI]); const EQuantizeType st_type = gqr.st_type; @@ -191,32 +192,32 @@ static void Helper_Quantize(PowerPC::MMU& mmu, const PowerPC::PowerPCState* ppcs if (instW != 0) { - WriteUnpaired(mmu, conv_ps0, addr); + WriteUnpaired(mmu, conv_ps0, addr, inst); } else { const u64 integral_ps1 = Common::BitCast(ps1); const u32 conv_ps1 = ConvertToSingleFTZ(integral_ps1); - WritePair(mmu, conv_ps0, conv_ps1, addr); + WritePair(mmu, conv_ps0, conv_ps1, addr, inst); } break; } case QUANTIZE_U8: - QuantizeAndStore(mmu, ps0, ps1, addr, instW, st_scale); + QuantizeAndStore(mmu, inst, ps0, ps1, addr, instW, st_scale); break; case QUANTIZE_U16: - QuantizeAndStore(mmu, ps0, ps1, addr, instW, st_scale); + QuantizeAndStore(mmu, inst, ps0, ps1, addr, instW, st_scale); break; case QUANTIZE_S8: - QuantizeAndStore(mmu, ps0, ps1, addr, instW, st_scale); + QuantizeAndStore(mmu, inst, ps0, ps1, addr, instW, st_scale); break; case QUANTIZE_S16: - QuantizeAndStore(mmu, ps0, ps1, addr, instW, st_scale); + QuantizeAndStore(mmu, inst, ps0, ps1, addr, instW, st_scale); break; case QUANTIZE_INVALID1: @@ -228,20 +229,21 @@ static void Helper_Quantize(PowerPC::MMU& mmu, const PowerPC::PowerPCState* ppcs } template -std::pair LoadAndDequantize(PowerPC::MMU& mmu, u32 addr, u32 instW, u32 ld_scale) +std::pair LoadAndDequantize(PowerPC::MMU& mmu, UGeckoInstruction inst, u32 addr, + u32 instW, u32 ld_scale) { using U = std::make_unsigned_t; float ps0, ps1; if (instW != 0) { - const U value = ReadUnpaired(mmu, addr); + const U value = ReadUnpaired(mmu, addr, inst); ps0 = float(T(value)) * m_dequantizeTable[ld_scale]; ps1 = 1.0f; } else { - const auto [first, second] = ReadPair(mmu, addr); + const auto [first, second] = ReadPair(mmu, addr, inst); ps0 = float(T(first)) * m_dequantizeTable[ld_scale]; ps1 = float(T(second)) * m_dequantizeTable[ld_scale]; } @@ -249,8 +251,8 @@ std::pair LoadAndDequantize(PowerPC::MMU& mmu, u32 addr, u32 ins return {static_cast(ps0), static_cast(ps1)}; } -static void Helper_Dequantize(PowerPC::MMU& mmu, PowerPC::PowerPCState* ppcs, u32 addr, u32 instI, - u32 instRD, u32 instW) +static void Helper_Dequantize(PowerPC::MMU& mmu, PowerPC::PowerPCState* ppcs, + UGeckoInstruction inst, u32 addr, u32 instI, u32 instRD, u32 instW) { const UGQR gqr(ppcs->spr[SPR_GQR0 + instI]); const EQuantizeType ld_type = gqr.ld_type; @@ -264,32 +266,32 @@ static void Helper_Dequantize(PowerPC::MMU& mmu, PowerPC::PowerPCState* ppcs, u3 case QUANTIZE_FLOAT: if (instW != 0) { - const u32 value = ReadUnpaired(mmu, addr); + const u32 value = ReadUnpaired(mmu, addr, inst); ps0 = Common::BitCast(ConvertToDouble(value)); ps1 = 1.0; } else { - const auto [first, second] = ReadPair(mmu, addr); + const auto [first, second] = ReadPair(mmu, addr, inst); ps0 = Common::BitCast(ConvertToDouble(first)); ps1 = Common::BitCast(ConvertToDouble(second)); } break; case QUANTIZE_U8: - std::tie(ps0, ps1) = LoadAndDequantize(mmu, addr, instW, ld_scale); + std::tie(ps0, ps1) = LoadAndDequantize(mmu, inst, addr, instW, ld_scale); break; case QUANTIZE_U16: - std::tie(ps0, ps1) = LoadAndDequantize(mmu, addr, instW, ld_scale); + std::tie(ps0, ps1) = LoadAndDequantize(mmu, inst, addr, instW, ld_scale); break; case QUANTIZE_S8: - std::tie(ps0, ps1) = LoadAndDequantize(mmu, addr, instW, ld_scale); + std::tie(ps0, ps1) = LoadAndDequantize(mmu, inst, addr, instW, ld_scale); break; case QUANTIZE_S16: - std::tie(ps0, ps1) = LoadAndDequantize(mmu, addr, instW, ld_scale); + std::tie(ps0, ps1) = LoadAndDequantize(mmu, inst, addr, instW, ld_scale); break; case QUANTIZE_INVALID1: @@ -319,7 +321,7 @@ void Interpreter::psq_l(Interpreter& interpreter, UGeckoInstruction inst) } const u32 EA = inst.RA ? (ppc_state.gpr[inst.RA] + u32(inst.SIMM_12)) : u32(inst.SIMM_12); - Helper_Dequantize(interpreter.m_mmu, &ppc_state, EA, inst.I, inst.RD, inst.W); + Helper_Dequantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.I, inst.RD, inst.W); } void Interpreter::psq_lu(Interpreter& interpreter, UGeckoInstruction inst) @@ -332,7 +334,7 @@ void Interpreter::psq_lu(Interpreter& interpreter, UGeckoInstruction inst) } const u32 EA = ppc_state.gpr[inst.RA] + u32(inst.SIMM_12); - Helper_Dequantize(interpreter.m_mmu, &ppc_state, EA, inst.I, inst.RD, inst.W); + Helper_Dequantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.I, inst.RD, inst.W); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { @@ -352,7 +354,7 @@ void Interpreter::psq_st(Interpreter& interpreter, UGeckoInstruction inst) } const u32 EA = inst.RA ? (ppc_state.gpr[inst.RA] + u32(inst.SIMM_12)) : u32(inst.SIMM_12); - Helper_Quantize(interpreter.m_mmu, &ppc_state, EA, inst.I, inst.RS, inst.W); + Helper_Quantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.I, inst.RS, inst.W); } void Interpreter::psq_stu(Interpreter& interpreter, UGeckoInstruction inst) @@ -365,7 +367,7 @@ void Interpreter::psq_stu(Interpreter& interpreter, UGeckoInstruction inst) } const u32 EA = ppc_state.gpr[inst.RA] + u32(inst.SIMM_12); - Helper_Quantize(interpreter.m_mmu, &ppc_state, EA, inst.I, inst.RS, inst.W); + Helper_Quantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.I, inst.RS, inst.W); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { @@ -380,7 +382,7 @@ void Interpreter::psq_lx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 EA = inst.RA ? (ppc_state.gpr[inst.RA] + ppc_state.gpr[inst.RB]) : ppc_state.gpr[inst.RB]; - Helper_Dequantize(interpreter.m_mmu, &ppc_state, EA, inst.Ix, inst.RD, inst.Wx); + Helper_Dequantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.Ix, inst.RD, inst.Wx); } void Interpreter::psq_stx(Interpreter& interpreter, UGeckoInstruction inst) @@ -388,14 +390,14 @@ void Interpreter::psq_stx(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const u32 EA = inst.RA ? (ppc_state.gpr[inst.RA] + ppc_state.gpr[inst.RB]) : ppc_state.gpr[inst.RB]; - Helper_Quantize(interpreter.m_mmu, &ppc_state, EA, inst.Ix, inst.RS, inst.Wx); + Helper_Quantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.Ix, inst.RS, inst.Wx); } void Interpreter::psq_lux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 EA = ppc_state.gpr[inst.RA] + ppc_state.gpr[inst.RB]; - Helper_Dequantize(interpreter.m_mmu, &ppc_state, EA, inst.Ix, inst.RD, inst.Wx); + Helper_Dequantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.Ix, inst.RD, inst.Wx); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { @@ -409,7 +411,7 @@ void Interpreter::psq_stux(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const u32 EA = ppc_state.gpr[inst.RA] + ppc_state.gpr[inst.RB]; - Helper_Quantize(interpreter.m_mmu, &ppc_state, EA, inst.Ix, inst.RS, inst.Wx); + Helper_Quantize(interpreter.m_mmu, &ppc_state, inst, EA, inst.Ix, inst.RS, inst.Wx); if ((ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION) != 0) { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 6a1dc651419c..1a8cb2bdf504 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -218,7 +218,7 @@ void Jit64::lXXx(UGeckoInstruction inst) if (update && storeAddress) registersInUse[RSCRATCH2] = true; - SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, registersInUse, signExtend); + SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, inst, registersInUse, signExtend); if (update && storeAddress) MOV(32, Ra, opAddress); @@ -445,7 +445,7 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); - ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH); + ABI_CallFunctionPRC(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH, inst.hex); ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (emit_fast_path) @@ -496,7 +496,7 @@ void Jit64::stX(UGeckoInstruction inst) const bool exception = [&] { RCOpArg Rs = gpr.Use(s, RCMode::Read); RegCache::Realize(Rs); - return WriteToConstAddress(accessSize, Rs, addr, CallerSavedRegistersInUse()); + return WriteToConstAddress(accessSize, Rs, addr, inst, CallerSavedRegistersInUse()); }(); if (update) { @@ -529,7 +529,7 @@ void Jit64::stX(UGeckoInstruction inst) reg_value = gpr.BindOrImm(s, RCMode::Read); } RegCache::Realize(Ra, reg_value); - SafeWriteRegToReg(reg_value, Ra, accessSize, offset, CallerSavedRegistersInUse(), + SafeWriteRegToReg(reg_value, Ra, accessSize, offset, inst, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); if (update) @@ -584,7 +584,7 @@ void Jit64::stXx(UGeckoInstruction inst) BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update) registersInUse[RSCRATCH2] = true; - SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, registersInUse, + SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, inst, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); if (update) @@ -607,7 +607,7 @@ void Jit64::lmw(UGeckoInstruction inst) } for (int i = d; i < 32; i++) { - SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4, + SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4, inst, CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false); RCOpArg Ri = gpr.Bind(i, RCMode::Write); RegCache::Realize(Ri); @@ -638,7 +638,7 @@ void Jit64::stmw(UGeckoInstruction inst) MOV(32, R(RSCRATCH2), Ri); Ri = RCOpArg::R(RSCRATCH2); } - SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16, + SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16, inst, CallerSavedRegistersInUse()); } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 7ac34f5d6ee6..e904e56860f8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -67,7 +67,7 @@ void Jit64::lfXXX(UGeckoInstruction inst) BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && jo.memcheck) registersInUse[RSCRATCH2] = true; - SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false); + SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, inst, registersInUse, false); if (single) { @@ -134,7 +134,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) { u32 addr = (a ? gpr.Imm32(a) : 0) + imm; bool exception = - WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse()); + WriteToConstAddress(accessSize, R(RSCRATCH), addr, inst, CallerSavedRegistersInUse()); if (update) { @@ -180,7 +180,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) if (update) registersInUse[RSCRATCH2] = true; - SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, inst, registersInUse); if (update) MOV(32, Ra, R(RSCRATCH2)); @@ -207,5 +207,5 @@ void Jit64::stfiwx(UGeckoInstruction inst) MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg()); else MOV(32, R(RSCRATCH), Rs); - SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse()); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, inst, CallerSavedRegistersInUse()); } diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index e1b720dace92..f15e5e4462fc 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -317,7 +317,8 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, } void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, - s32 offset, BitSet32 registersInUse, bool signExtend, int flags) + s32 offset, UGeckoInstruction inst, BitSet32 registersInUse, + bool signExtend, int flags) { bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0; @@ -332,6 +333,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov); TrampolineInfo& info = m_back_patch_info[mov.address]; info.pc = js.compilerPC; + info.inst = inst; info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG; info.start = backpatchStart; info.read = true; @@ -357,7 +359,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, if (opAddress.IsImm()) { u32 address = opAddress.Imm32() + offset; - SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend); + SafeLoadToRegImmediate(reg_value, address, accessSize, inst, registersInUse, signExtend); return; } @@ -401,16 +403,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, switch (accessSize) { case 64: - ABI_CallFunctionPR(PowerPC::ReadU64FromJit, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPRC(PowerPC::ReadU64FromJit, &m_jit.m_mmu, reg_addr, inst.hex); break; case 32: - ABI_CallFunctionPR(PowerPC::ReadU32FromJit, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPRC(PowerPC::ReadU32FromJit, &m_jit.m_mmu, reg_addr, inst.hex); break; case 16: - ABI_CallFunctionPR(PowerPC::ReadU16FromJit, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPRC(PowerPC::ReadU16FromJit, &m_jit.m_mmu, reg_addr, inst.hex); break; case 8: - ABI_CallFunctionPR(PowerPC::ReadU8FromJit, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPRC(PowerPC::ReadU8FromJit, &m_jit.m_mmu, reg_addr, inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -438,7 +440,8 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, } void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize, - BitSet32 registersInUse, bool signExtend) + UGeckoInstruction inst, BitSet32 registersInUse, + bool signExtend) { // If the address is known to be RAM, just load it directly. if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize)) @@ -465,16 +468,16 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc switch (accessSize) { case 64: - ABI_CallFunctionPC(PowerPC::ReadU64FromJit, &m_jit.m_mmu, address); + ABI_CallFunctionPCC(PowerPC::ReadU64FromJit, &m_jit.m_mmu, address, inst.hex); break; case 32: - ABI_CallFunctionPC(PowerPC::ReadU32FromJit, &m_jit.m_mmu, address); + ABI_CallFunctionPCC(PowerPC::ReadU32FromJit, &m_jit.m_mmu, address, inst.hex); break; case 16: - ABI_CallFunctionPC(PowerPC::ReadU16FromJit, &m_jit.m_mmu, address); + ABI_CallFunctionPCC(PowerPC::ReadU16FromJit, &m_jit.m_mmu, address, inst.hex); break; case 8: - ABI_CallFunctionPC(PowerPC::ReadU8FromJit, &m_jit.m_mmu, address); + ABI_CallFunctionPCC(PowerPC::ReadU8FromJit, &m_jit.m_mmu, address, inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, 0); @@ -492,7 +495,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc } void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags) + UGeckoInstruction inst, BitSet32 registersInUse, int flags) { bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0; @@ -509,6 +512,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov); TrampolineInfo& info = m_back_patch_info[mov.address]; info.pc = js.compilerPC; + info.inst = inst; info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG; info.start = backpatchStart; info.read = false; @@ -588,19 +592,19 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces switch (accessSize) { case 64: - ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit : PowerPC::WriteU64SwapFromJit, - &m_jit.m_mmu, reg, reg_addr); + ABI_CallFunctionPRRC(swap ? PowerPC::WriteU64FromJit : PowerPC::WriteU64SwapFromJit, + &m_jit.m_mmu, reg, reg_addr, inst.hex); break; case 32: - ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit : PowerPC::WriteU32SwapFromJit, - &m_jit.m_mmu, reg, reg_addr); + ABI_CallFunctionPRRC(swap ? PowerPC::WriteU32FromJit : PowerPC::WriteU32SwapFromJit, + &m_jit.m_mmu, reg, reg_addr, inst.hex); break; case 16: - ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit : PowerPC::WriteU16SwapFromJit, - &m_jit.m_mmu, reg, reg_addr); + ABI_CallFunctionPRRC(swap ? PowerPC::WriteU16FromJit : PowerPC::WriteU16SwapFromJit, + &m_jit.m_mmu, reg, reg_addr, inst.hex); break; case 8: - ABI_CallFunctionPRR(PowerPC::WriteU8FromJit, &m_jit.m_mmu, reg, reg_addr); + ABI_CallFunctionPRRC(PowerPC::WriteU8FromJit, &m_jit.m_mmu, reg, reg_addr, inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -619,9 +623,10 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces } void EmuCodeBlock::SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, - s32 offset, BitSet32 registersInUse, int flags) + s32 offset, UGeckoInstruction inst, BitSet32 registersInUse, + int flags) { - SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags); + SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, inst, registersInUse, flags); } bool EmuCodeBlock::WriteClobbersRegValue(int accessSize, bool swap) @@ -630,7 +635,7 @@ bool EmuCodeBlock::WriteClobbersRegValue(int accessSize, bool swap) } bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, - BitSet32 registersInUse) + UGeckoInstruction inst, BitSet32 registersInUse) { arg = FixImmediate(accessSize, arg); @@ -670,16 +675,16 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, switch (accessSize) { case 64: - ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPACC(64, PowerPC::WriteU64FromJit, &m_jit.m_mmu, arg, address, inst.hex); break; case 32: - ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPACC(32, PowerPC::WriteU32FromJit, &m_jit.m_mmu, arg, address, inst.hex); break; case 16: - ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPACC(16, PowerPC::WriteU16FromJit, &m_jit.m_mmu, arg, address, inst.hex); break; case 8: - ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPACC(8, PowerPC::WriteU8FromJit, &m_jit.m_mmu, arg, address, inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, 0); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index ba9433134f26..1a3e8ae3d96d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -9,6 +9,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64Common/ConstantPool.h" #include "Core/PowerPC/Jit64Common/FarCodeCache.h" #include "Core/PowerPC/Jit64Common/TrampolineInfo.h" @@ -84,23 +85,25 @@ class EmuCodeBlock : public Gen::X64CodeBlock }; void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset, - BitSet32 registersInUse, bool signExtend, int flags = 0); + UGeckoInstruction inst, BitSet32 registersInUse, bool signExtend, + int flags = 0); void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize, - BitSet32 registersInUse, bool signExtend); + UGeckoInstruction inst, BitSet32 registersInUse, bool signExtend); // Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves // reg_value if the load fails and js.memcheck is enabled. // Works with immediate inputs and simple registers only. void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags = 0); + UGeckoInstruction inst, BitSet32 registersInUse, int flags = 0); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags = 0); + UGeckoInstruction inst, BitSet32 registersInUse, int flags = 0); // applies to safe and unsafe WriteRegToReg bool WriteClobbersRegValue(int accessSize, bool swap); // returns true if an exception could have been caused - bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse); + bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, UGeckoInstruction inst, + BitSet32 registersInUse); void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true); void JitGetAndClearCAOV(bool oe); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 94e7f99423e1..2fa2f2d32abf 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -338,6 +338,9 @@ alignas(16) static const float m_m128 = -128.0f; // Sizes of the various quantized store types constexpr std::array sizes{{32, 0, 0, 0, 8, 16, 8, 16}}; +// TODO: Use the actual instruction being emulated (needed for alignment exception emulation) +static const UGeckoInstruction ps_placeholder_instruction = 0; + void CommonAsmRoutines::GenQuantizedStores() { // Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_stXX). @@ -540,7 +543,8 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type, if (!single) flags |= SAFE_LOADSTORE_NO_SWAP; - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, size, 0, QUANTIZED_REGS_TO_SAVE, flags); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, size, 0, ps_placeholder_instruction, + QUANTIZED_REGS_TO_SAVE, flags); } void QuantizedMemoryRoutines::GenQuantizedStoreFloat(bool single, bool isInline) @@ -595,7 +599,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type, int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON | SAFE_LOADSTORE_NO_UPDATE_PC; - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags); + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, ps_placeholder_instruction, regsToSave, + extend, flags); if (!single && (type == QUANTIZE_U8 || type == QUANTIZE_S8)) { // TODO: Support not swapping in safeLoadToReg to avoid bswapping twice @@ -703,7 +708,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON | SAFE_LOADSTORE_NO_UPDATE_PC; - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags); + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, ps_placeholder_instruction, regsToSave, + extend, flags); if (single) { diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp index 186868f7a8db..02da8cf7f608 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp @@ -43,8 +43,9 @@ const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info) const u8* trampoline = GetCodePtr(); - SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse, - info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); + SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.inst, + info.registersInUse, info.signExtend, + info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); JMP(info.start + info.len, Jump::Near); @@ -62,7 +63,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info) // Don't treat FIFO writes specially for now because they require a burst // check anyway. - SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset, + SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset, info.inst, info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); JMP(info.start + info.len, Jump::Near); diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h index bdf1a1badd26..1b8a66b659a4 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h @@ -6,6 +6,7 @@ #include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Gekko.h" // Stores information we need to batch-patch a MOV with a call to the slow read/write path after // it faults. There will be 10s of thousands of these structs live, so be wary of making this too @@ -21,6 +22,9 @@ struct TrampolineInfo final // The PPC PC for the current load/store block u32 pc = 0; + // The instruction which is being emulated + UGeckoInstruction inst; + // Saved because we need these to make the ABI call in the trampoline BitSet32 registersInUse{}; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 02fc3ca3533c..98e7fc760631 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -12,6 +12,7 @@ #include "Common/Arm64Emitter.h" #include "Core/PowerPC/CPUCoreBase.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" @@ -243,36 +244,42 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA // // Registers used: // - // addr scratch - // Store: X2 X1 - // Load: X1 - // Zero 256: X1 X30 - // Store float: X2 Q0 - // Load float: X1 + // addr inst scratch + // Store: X2 X3 X1 + // Load: X1 X2 + // Zero 256: X1 X2 X30 + // Store float: X2 X3 Q0 + // Load float: X1 X2 // // If mode == AlwaysFastAccess, the addr argument can be any register. // Otherwise it must be the register listed in the table above. // + // If emitting_routine, the PowerPC instruction being executed must be present in the inst + // register listed in the table above. If not, the inst argument to this function is used instead. + // // Additional scratch registers are used in the following situations: // - // emitting_routine && mode == Auto: X0 - // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 - // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3 - // mode != AlwaysSlowAccess && !jo.fastmem: X0 - // !emitting_routine && mode != AlwaysFastAccess && jo.memcheck && - // (flags & BackPatchInfo::FLAG_LOAD): X0 - // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 - // !emitting_routine && mode == Auto && jo.fastmem: X30 + // emitting_routine && mode == Auto: X0 + // emitting_routine && mode == Auto && !(flags & FLAG_STORE): X3 + // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem && !(flags & FLAG_STORE): X3 + // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem && (flags & FLAG_STORE): X4 + // mode != AlwaysSlowAccess && !jo.fastmem: X0 + // !emitting_routine && mode != AlwaysFastAccess && jo.memcheck && (flags & FLAG_LOAD): X0 + // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 + // !emitting_routine && mode == Auto && jo.fastmem: X30 // // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push // may be clobbered if mode != AlwaysFastAccess. - void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, - Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), + void EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, MemAccessMode mode, + Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, + BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); // Loadstore routines - void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); - void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update); + void SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 offsetReg, u32 flags, + s32 offset, bool update); + void SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 regOffset, u32 flags, + s32 offset, bool update); // If lookup succeeds, writes upper 15 bits of physical address to addr_out. If not, // jumps to the returned FixupBranch. Clobbers tmp and the 17 lower bits of addr_out. Arm64Gen::FixupBranch BATAddressLookup(Arm64Gen::ARM64Reg addr_out, Arm64Gen::ARM64Reg addr_in, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 827fb58a13d8..3fefeccf46a0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -53,9 +53,9 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx) ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory); } -void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr, - BitSet32 gprs_to_push, BitSet32 fprs_to_push, - bool emitting_routine) +void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, MemAccessMode mode, + ARM64Reg RS, ARM64Reg addr, BitSet32 gprs_to_push, + BitSet32 fprs_to_push, bool emitting_routine) { const u32 access_size = BackPatchInfo::GetFlagSize(flags); @@ -76,7 +76,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (!jo.fastmem) { - const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30; + ARM64Reg temp = ARM64Reg::W30; + if (emitting_routine) + temp = (flags & BackPatchInfo::FLAG_STORE) ? ARM64Reg::W4 : ARM64Reg::W3; memory_base = EncodeRegTo64(temp); memory_offset = ARM64Reg::W0; @@ -222,42 +224,48 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, src_reg = dst_reg; } + const auto call_func = [&](auto f) { + if (emitting_routine) + ABI_CallFunction(f, &m_mmu, src_reg, ARM64Reg::W2, ARM64Reg::W3); + else + ABI_CallFunction(f, &m_mmu, src_reg, ARM64Reg::W2, inst.hex); + }; + const bool reverse = (flags & BackPatchInfo::FLAG_REVERSE) != 0; if (access_size == 64) - { - ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit, - &m_mmu, src_reg, ARM64Reg::W2); - } + call_func(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit); else if (access_size == 32) - { - ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit, - &m_mmu, src_reg, ARM64Reg::W2); - } + call_func(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit); else if (access_size == 16) - { - ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit, - &m_mmu, src_reg, ARM64Reg::W2); - } + call_func(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit); else - { - ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2); - } + call_func(&PowerPC::WriteU8FromJit); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1); + if (emitting_routine) + ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1, inst.hex); + else + ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1, ARM64Reg::W2); } else { + const auto call_func = [&](auto f) { + if (emitting_routine) + ABI_CallFunction(f, &m_mmu, ARM64Reg::W1, ARM64Reg::W2); + else + ABI_CallFunction(f, &m_mmu, ARM64Reg::W1, inst.hex); + }; + if (access_size == 64) - ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1); + call_func(&PowerPC::ReadU64FromJit); else if (access_size == 32) - ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1); + call_func(&PowerPC::ReadU32FromJit); else if (access_size == 16) - ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1); + call_func(&PowerPC::ReadU16FromJit); else - ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1); + call_func(&PowerPC::ReadU8FromJit); } m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 49e4d50617ee..8e6b6e33ab65 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -26,7 +26,8 @@ using namespace Arm64Gen; -void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) +void JitArm64::SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 offsetReg, u32 flags, + s32 offset, bool update) { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); @@ -139,7 +140,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, + EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, fprs_in_use); } else if (mmio_address) @@ -154,7 +155,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); } gpr.BindToRegister(dest, false, true); @@ -172,8 +173,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o gpr.Unlock(ARM64Reg::W0); } -void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, - bool update) +void JitArm64::SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 regOffset, + u32 flags, s32 offset, bool update) { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); @@ -311,7 +312,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, + fprs_in_use); } else if (mmio_address) { @@ -326,7 +328,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); } if (update && !early_update) @@ -444,7 +446,7 @@ void JitArm64::lXX(UGeckoInstruction inst) break; } - SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); + SafeLoadToReg(inst, d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); } void JitArm64::stX(UGeckoInstruction inst) @@ -509,7 +511,7 @@ void JitArm64::stX(UGeckoInstruction inst) break; } - SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset, update); + SafeStoreFromReg(inst, update ? a : (a ? a : -1), s, regOffset, flags, offset, update); } void JitArm64::lmw(UGeckoInstruction inst) @@ -595,8 +597,8 @@ void JitArm64::lmw(UGeckoInstruction inst) if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), + regs_in_use, fprs_in_use); gpr.BindToRegister(i, false, true); ASSERT(dest_reg == gpr.R(i)); @@ -711,8 +713,8 @@ void JitArm64::stmw(UGeckoInstruction inst) if (!jo.fastmem) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), + regs_in_use, fprs_in_use); // To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores // after this instruction, flush registers that would be flushed after this instruction anyway. @@ -1020,7 +1022,7 @@ void JitArm64::dcbz(UGeckoInstruction inst) if (!jo.fastmem) gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; - EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, + EmitBackpatchRoutine(inst, BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); if (using_dcbz_hack) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 0e471e355b20..6e148df86913 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -176,11 +176,12 @@ void JitArm64::lfXX(UGeckoInstruction inst) if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, + fprs_in_use); } else { - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); } const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); @@ -402,20 +403,20 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, + EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, fprs_in_use); } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, + EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, fprs_in_use); } } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); } if (update && !early_update) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 5f26d823f1ba..4c2cb2fe948f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -29,7 +29,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) // X30 is LR // X0 is a temporary // X1 is the address - // X2 is the scale + // X2 is the instruction + // X3 is a temporary + // X4 is the scale // Q0 is the return register // Q1 is a temporary const s32 offset = inst.SIMM_12; @@ -38,11 +40,11 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { - gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W3, ARM64Reg::W4); fpr.Lock(ARM64Reg::Q1); } else if (jo.memcheck || !jo.fastmem) @@ -52,7 +54,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) constexpr ARM64Reg type_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W1; - constexpr ARM64Reg scale_reg = ARM64Reg::W2; + constexpr ARM64Reg inst_reg = ARM64Reg::W2; + constexpr ARM64Reg scale_reg = ARM64Reg::W4; ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false); if (inst.RA || update) // Always uses the register on update @@ -87,6 +90,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; if (jo.memcheck || !jo.fastmem) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; if (!jo.memcheck) fprs_in_use[DecodeReg(VS)] = 0; @@ -95,7 +99,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, fprs_in_use); } else @@ -111,6 +115,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) MOVP2R(ARM64Reg::X30, w ? single_load_quantized : paired_load_quantized); LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true)); + + MOVI2R(inst_reg, inst.hex); + BLR(EncodeRegTo64(type_reg)); WriteConditionalExceptionExit(ANY_LOADSTORE_EXCEPTION, ARM64Reg::W30, ARM64Reg::Q1); @@ -133,11 +140,11 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) MOV(gpr.R(inst.RA), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W3, ARM64Reg::W4); fpr.Unlock(ARM64Reg::Q1); } else if (jo.memcheck || !jo.fastmem) @@ -159,6 +166,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) // X0 is a temporary // X1 is the scale // X2 is the address + // X3 is the instruction + // X4 is a temporary if jo.fastmem is false // Q0 is the store register const s32 offset = inst.SIMM_12; @@ -207,12 +216,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!js.assumeNoPairedQuantize || !jo.fastmem) gpr.Lock(ARM64Reg::W0); - if (!js.assumeNoPairedQuantize && !jo.fastmem) + if (!js.assumeNoPairedQuantize) gpr.Lock(ARM64Reg::W3); + if (!js.assumeNoPairedQuantize && !jo.fastmem) + gpr.Lock(ARM64Reg::W4); constexpr ARM64Reg type_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W2; + constexpr ARM64Reg inst_reg = ARM64Reg::W3; if (inst.RA || update) // Always uses the register on update { @@ -252,7 +264,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, + EmitBackpatchRoutine(inst, flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, fprs_in_use); } else @@ -268,6 +280,9 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) MOVP2R(ARM64Reg::X30, w ? single_store_quantized : paired_store_quantized); LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true)); + + MOVI2R(inst_reg, inst.hex); + BLR(EncodeRegTo64(type_reg)); WriteConditionalExceptionExit(ANY_LOADSTORE_EXCEPTION, ARM64Reg::W30, ARM64Reg::Q1); @@ -286,8 +301,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize || !jo.fastmem) gpr.Unlock(ARM64Reg::W0); - if (!js.assumeNoPairedQuantize && !jo.fastmem) + if (!js.assumeNoPairedQuantize) gpr.Unlock(ARM64Reg::W3); + if (!js.assumeNoPairedQuantize && !jo.fastmem) + gpr.Unlock(ARM64Reg::W4); if (!js.assumeNoPairedQuantize) fpr.Unlock(ARM64Reg::Q1); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index cd93fccebf93..58abbb5f3caa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -516,15 +516,16 @@ void JitArm64::GenerateQuantizedLoads() { // X0 is a temporary // X1 is the address - // X2 is the scale + // X2 is the instruction // X3 is a temporary (used in EmitBackpatchRoutine) + // X4 is the scale // X30 is LR - // Q0 is the return + // Q0 is the return value // Q1 is a temporary ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg addr_reg = ARM64Reg::X1; - ARM64Reg scale_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3}; + ARM64Reg scale_reg = ARM64Reg::X4; + BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2, 3}; if (!jo.memcheck) gprs_to_push &= ~BitSet32{1}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; @@ -538,7 +539,7 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); RET(ARM64Reg::X30); @@ -548,15 +549,15 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X0, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X0, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -566,8 +567,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -584,8 +585,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -601,8 +602,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -619,7 +620,7 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); RET(ARM64Reg::X30); @@ -629,8 +630,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -647,8 +648,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -665,8 +666,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -682,8 +683,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -727,18 +728,19 @@ void JitArm64::GenerateQuantizedStores() // X0 is a temporary // X1 is the scale // X2 is the address - // X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine) + // X3 is the instruction + // X4 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine) // X30 is LR - // Q0 is the register + // Q0 is the value to store // Q1 is a temporary ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg scale_reg = ARM64Reg::X1; ARM64Reg addr_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1}; + BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1, 3}; if (!jo.memcheck) gprs_to_push &= ~BitSet32{2}; if (!jo.fastmem) - gprs_to_push &= ~BitSet32{3}; + gprs_to_push &= ~BitSet32{4}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; ARM64FloatEmitter float_emit(this); @@ -750,8 +752,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -769,8 +771,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -788,8 +790,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -806,8 +808,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -824,8 +826,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -835,8 +837,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -854,8 +856,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -873,8 +875,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -891,8 +893,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -909,8 +911,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(UGeckoInstruction{}, flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, + gprs_to_push, fprs_to_push, true); RET(ARM64Reg::X30); } diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index c868fdb1e32e..3fb5d39491d4 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -44,6 +44,7 @@ #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/GDBStub.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -146,7 +147,7 @@ static void EFB_Write(u32 data, u32 addr) } template -T MMU::ReadFromHardware(const u32 effective_address) +T MMU::ReadFromHardware(const u32 effective_address, const UGeckoInstruction inst) { // ReadFromHardware is currently used with XCheckTLBFlag::OpcodeNoException by host instruction // functions. Actual instruction decoding (which can raise exceptions and uses icache) is handled @@ -165,7 +166,7 @@ T MMU::ReadFromHardware(const u32 effective_address) u64 var = 0; for (u32 i = 0; i < sizeof(T); ++i) { - var = (var << 8) | ReadFromHardware(effective_address + i); + var = (var << 8) | ReadFromHardware(effective_address + i, inst); } return static_cast(var); } @@ -274,7 +275,8 @@ T MMU::ReadFromHardware(const u32 effective_address) } template -void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32 size) +void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32 size, + const UGeckoInstruction inst) { static_assert(flag == XCheckTLBFlag::NoException || flag == XCheckTLBFlag::Write); @@ -290,8 +292,8 @@ void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32 const u32 first_half_size = effective_end_page - effective_address; const u32 second_half_size = size - first_half_size; WriteToHardware(effective_address, std::rotr(data, second_half_size * 8), - first_half_size); - WriteToHardware(effective_end_page, data, second_half_size); + first_half_size, inst); + WriteToHardware(effective_end_page, data, second_half_size, inst); return; } @@ -413,8 +415,8 @@ void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32 const u32 end_addr = Common::AlignUp(physical_address + size, 8); for (u32 addr = start_addr; addr != end_addr; addr += 8) { - WriteToHardware(addr, rotated_data, 4); - WriteToHardware(addr + 4, rotated_data, 4); + WriteToHardware(addr, rotated_data, 4, inst); + WriteToHardware(addr + 4, rotated_data, 4, inst); } return; @@ -520,7 +522,7 @@ TryReadInstResult MMU::TryReadInstruction(u32 address) u32 MMU::HostRead_Instruction(const Core::CPUThreadGuard& guard, const u32 address) { return guard.GetSystem().GetMMU().ReadFromHardware( - address); + address, UGeckoInstruction{}); } std::optional> MMU::HostTryReadInstruction(const Core::CPUThreadGuard& guard, @@ -535,19 +537,22 @@ std::optional> MMU::HostTryReadInstruction(const Core::CPUThread { case RequestedAddressSpace::Effective: { - const u32 value = mmu.ReadFromHardware(address); + const u32 value = + mmu.ReadFromHardware(address, UGeckoInstruction{}); return ReadResult(!!mmu.m_ppc_state.msr.IR, value); } case RequestedAddressSpace::Physical: { - const u32 value = mmu.ReadFromHardware(address); + const u32 value = mmu.ReadFromHardware( + address, UGeckoInstruction{}); return ReadResult(false, value); } case RequestedAddressSpace::Virtual: { if (!mmu.m_ppc_state.msr.IR) return std::nullopt; - const u32 value = mmu.ReadFromHardware(address); + const u32 value = + mmu.ReadFromHardware(address, UGeckoInstruction{}); return ReadResult(true, value); } } @@ -591,30 +596,30 @@ void MMU::Memcheck(u32 address, u64 var, bool write, size_t size) m_ppc_state.Exceptions |= EXCEPTION_FAKE_MEMCHECK_HIT; } -u8 MMU::Read_U8(const u32 address) +u8 MMU::Read_U8(const u32 address, const UGeckoInstruction inst) { - u8 var = ReadFromHardware(address); + u8 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 1); return var; } -u16 MMU::Read_U16(const u32 address) +u16 MMU::Read_U16(const u32 address, const UGeckoInstruction inst) { - u16 var = ReadFromHardware(address); + u16 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 2); return var; } -u32 MMU::Read_U32(const u32 address) +u32 MMU::Read_U32(const u32 address, const UGeckoInstruction inst) { - u32 var = ReadFromHardware(address); + u32 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 4); return var; } -u64 MMU::Read_U64(const u32 address) +u64 MMU::Read_U64(const u32 address, const UGeckoInstruction inst) { - u64 var = ReadFromHardware(address); + u64 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 8); return var; } @@ -631,19 +636,20 @@ std::optional> MMU::HostTryReadUX(const Core::CPUThreadGuard& guar { case RequestedAddressSpace::Effective: { - T value = mmu.ReadFromHardware(address); + T value = mmu.ReadFromHardware(address, UGeckoInstruction{}); return ReadResult(!!mmu.m_ppc_state.msr.DR, std::move(value)); } case RequestedAddressSpace::Physical: { - T value = mmu.ReadFromHardware(address); + T value = + mmu.ReadFromHardware(address, UGeckoInstruction{}); return ReadResult(false, std::move(value)); } case RequestedAddressSpace::Virtual: { if (!mmu.m_ppc_state.msr.DR) return std::nullopt; - T value = mmu.ReadFromHardware(address); + T value = mmu.ReadFromHardware(address, UGeckoInstruction{}); return ReadResult(true, std::move(value)); } } @@ -694,65 +700,65 @@ std::optional> MMU::HostTryReadF64(const Core::CPUThreadGuard return ReadResult(result->translated, Common::BitCast(result->value)); } -void MMU::Write_U8(const u32 var, const u32 address) +void MMU::Write_U8(const u32 var, const u32 address, const UGeckoInstruction inst) { Memcheck(address, var, true, 1); - WriteToHardware(address, var, 1); + WriteToHardware(address, var, 1, inst); } -void MMU::Write_U16(const u32 var, const u32 address) +void MMU::Write_U16(const u32 var, const u32 address, const UGeckoInstruction inst) { Memcheck(address, var, true, 2); - WriteToHardware(address, var, 2); + WriteToHardware(address, var, 2, inst); } -void MMU::Write_U16_Swap(const u32 var, const u32 address) +void MMU::Write_U16_Swap(const u32 var, const u32 address, const UGeckoInstruction inst) { - Write_U16((var & 0xFFFF0000) | Common::swap16(static_cast(var)), address); + Write_U16((var & 0xFFFF0000) | Common::swap16(static_cast(var)), address, inst); } -void MMU::Write_U32(const u32 var, const u32 address) +void MMU::Write_U32(const u32 var, const u32 address, const UGeckoInstruction inst) { Memcheck(address, var, true, 4); - WriteToHardware(address, var, 4); + WriteToHardware(address, var, 4, inst); } -void MMU::Write_U32_Swap(const u32 var, const u32 address) +void MMU::Write_U32_Swap(const u32 var, const u32 address, const UGeckoInstruction inst) { - Write_U32(Common::swap32(var), address); + Write_U32(Common::swap32(var), address, inst); } -void MMU::Write_U64(const u64 var, const u32 address) +void MMU::Write_U64(const u64 var, const u32 address, const UGeckoInstruction inst) { Memcheck(address, var, true, 8); - WriteToHardware(address, static_cast(var >> 32), 4); - WriteToHardware(address + sizeof(u32), static_cast(var), 4); + WriteToHardware(address, static_cast(var >> 32), 4, inst); + WriteToHardware(address + sizeof(u32), static_cast(var), 4, inst); } -void MMU::Write_U64_Swap(const u64 var, const u32 address) +void MMU::Write_U64_Swap(const u64 var, const u32 address, const UGeckoInstruction inst) { - Write_U64(Common::swap64(var), address); + Write_U64(Common::swap64(var), address, inst); } u8 MMU::HostRead_U8(const Core::CPUThreadGuard& guard, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - return mmu.ReadFromHardware(address); + return mmu.ReadFromHardware(address, UGeckoInstruction{}); } u16 MMU::HostRead_U16(const Core::CPUThreadGuard& guard, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - return mmu.ReadFromHardware(address); + return mmu.ReadFromHardware(address, UGeckoInstruction{}); } u32 MMU::HostRead_U32(const Core::CPUThreadGuard& guard, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - return mmu.ReadFromHardware(address); + return mmu.ReadFromHardware(address, UGeckoInstruction{}); } u64 MMU::HostRead_U64(const Core::CPUThreadGuard& guard, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - return mmu.ReadFromHardware(address); + return mmu.ReadFromHardware(address, UGeckoInstruction{}); } float MMU::HostRead_F32(const Core::CPUThreadGuard& guard, const u32 address) @@ -772,26 +778,28 @@ double MMU::HostRead_F64(const Core::CPUThreadGuard& guard, const u32 address) void MMU::HostWrite_U8(const Core::CPUThreadGuard& guard, const u32 var, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - mmu.WriteToHardware(address, var, 1); + mmu.WriteToHardware(address, var, 1, UGeckoInstruction{}); } void MMU::HostWrite_U16(const Core::CPUThreadGuard& guard, const u32 var, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - mmu.WriteToHardware(address, var, 2); + mmu.WriteToHardware(address, var, 2, UGeckoInstruction{}); } void MMU::HostWrite_U32(const Core::CPUThreadGuard& guard, const u32 var, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - mmu.WriteToHardware(address, var, 4); + mmu.WriteToHardware(address, var, 4, UGeckoInstruction{}); } void MMU::HostWrite_U64(const Core::CPUThreadGuard& guard, const u64 var, const u32 address) { auto& mmu = guard.GetSystem().GetMMU(); - mmu.WriteToHardware(address, static_cast(var >> 32), 4); - mmu.WriteToHardware(address + sizeof(u32), static_cast(var), 4); + mmu.WriteToHardware(address, static_cast(var >> 32), 4, + UGeckoInstruction{}); + mmu.WriteToHardware(address + sizeof(u32), static_cast(var), 4, + UGeckoInstruction{}); } void MMU::HostWrite_F32(const Core::CPUThreadGuard& guard, const float var, const u32 address) @@ -819,15 +827,15 @@ std::optional MMU::HostTryWriteUX(const Core::CPUThreadGuard& guard switch (space) { case RequestedAddressSpace::Effective: - mmu.WriteToHardware(address, var, size); + mmu.WriteToHardware(address, var, size, UGeckoInstruction{}); return WriteResult(!!mmu.m_ppc_state.msr.DR); case RequestedAddressSpace::Physical: - mmu.WriteToHardware(address, var, size); + mmu.WriteToHardware(address, var, size, UGeckoInstruction{}); return WriteResult(false); case RequestedAddressSpace::Virtual: if (!mmu.m_ppc_state.msr.DR) return std::nullopt; - mmu.WriteToHardware(address, var, size); + mmu.WriteToHardware(address, var, size, UGeckoInstruction{}); return WriteResult(true); } @@ -1111,7 +1119,7 @@ static bool TranslateBatAddress(const BatTable& bat_table, u32* address, bool* w return true; } -void MMU::ClearDCacheLine(u32 address) +void MMU::ClearDCacheLine(u32 address, UGeckoInstruction inst) { DEBUG_ASSERT((address & 0x1F) == 0); if (m_ppc_state.msr.DR) @@ -1136,7 +1144,7 @@ void MMU::ClearDCacheLine(u32 address) // TODO: This isn't precisely correct for non-RAM regions, but the difference // is unlikely to matter. for (u32 i = 0; i < 32; i += 4) - WriteToHardware(address + i, 0, 4); + WriteToHardware(address + i, 0, 4, inst); } void MMU::StoreDCacheLine(u32 address) @@ -1495,11 +1503,12 @@ MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress add { constexpr XCheckTLBFlag pte_read_flag = IsNoExceptionFlag(flag) ? XCheckTLBFlag::NoException : XCheckTLBFlag::Read; - const u32 pteg = ReadFromHardware(pteg_addr); + const u32 pteg = ReadFromHardware(pteg_addr, UGeckoInstruction{}); if (pte1.Hex == pteg) { - UPTE_Hi pte2(ReadFromHardware(pteg_addr + 4)); + UPTE_Hi pte2( + ReadFromHardware(pteg_addr + 4, UGeckoInstruction{})); // set the access bits switch (flag) @@ -1712,52 +1721,52 @@ std::optional MMU::GetTranslatedAddress(u32 address) return std::optional(result.address); } -void ClearDCacheLineFromJit(MMU& mmu, u32 address) +void ClearDCacheLineFromJit(MMU& mmu, u32 address, UGeckoInstruction inst) { - mmu.ClearDCacheLine(address); + mmu.ClearDCacheLine(address, inst); } -u32 ReadU8FromJit(MMU& mmu, u32 address) +u32 ReadU8FromJit(MMU& mmu, u32 address, UGeckoInstruction inst) { - return mmu.Read_U8(address); + return mmu.Read_U8(address, inst); } -u32 ReadU16FromJit(MMU& mmu, u32 address) +u32 ReadU16FromJit(MMU& mmu, u32 address, UGeckoInstruction inst) { - return mmu.Read_U16(address); + return mmu.Read_U16(address, inst); } -u32 ReadU32FromJit(MMU& mmu, u32 address) +u32 ReadU32FromJit(MMU& mmu, u32 address, UGeckoInstruction inst) { - return mmu.Read_U32(address); + return mmu.Read_U32(address, inst); } -u64 ReadU64FromJit(MMU& mmu, u32 address) +u64 ReadU64FromJit(MMU& mmu, u32 address, UGeckoInstruction inst) { - return mmu.Read_U64(address); + return mmu.Read_U64(address, inst); } -void WriteU8FromJit(MMU& mmu, u32 var, u32 address) +void WriteU8FromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U8(var, address); + mmu.Write_U8(var, address, inst); } -void WriteU16FromJit(MMU& mmu, u32 var, u32 address) +void WriteU16FromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U16(var, address); + mmu.Write_U16(var, address, inst); } -void WriteU32FromJit(MMU& mmu, u32 var, u32 address) +void WriteU32FromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U32(var, address); + mmu.Write_U32(var, address, inst); } -void WriteU64FromJit(MMU& mmu, u64 var, u32 address) +void WriteU64FromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U64(var, address); + mmu.Write_U64(var, address, inst); } -void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address) +void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U16_Swap(var, address); + mmu.Write_U16_Swap(var, address, inst); } -void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address) +void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U32_Swap(var, address); + mmu.Write_U32_Swap(var, address, inst); } -void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address) +void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst) { - mmu.Write_U64_Swap(var, address); + mmu.Write_U64_Swap(var, address, inst); } } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 1f9d52f59991..bb2e0ea4e658 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -10,6 +10,7 @@ #include "Common/BitField.h" #include "Common/CommonTypes.h" +#include "Core/PowerPC/Gekko.h" namespace Core { @@ -213,24 +214,26 @@ class MMU u32 Read_Opcode(u32 address); TryReadInstResult TryReadInstruction(u32 address); - u8 Read_U8(u32 address); - u16 Read_U16(u32 address); - u32 Read_U32(u32 address); - u64 Read_U64(u32 address); + u8 Read_U8(u32 address, UGeckoInstruction inst); + u16 Read_U16(u32 address, UGeckoInstruction inst); + u32 Read_U32(u32 address, UGeckoInstruction inst); + u64 Read_U64(u32 address, UGeckoInstruction inst); - void Write_U8(u32 var, u32 address); - void Write_U16(u32 var, u32 address); - void Write_U32(u32 var, u32 address); - void Write_U64(u64 var, u32 address); + void Write_U8(u32 var, u32 address, UGeckoInstruction inst); + void Write_U16(u32 var, u32 address, UGeckoInstruction inst); + void Write_U32(u32 var, u32 address, UGeckoInstruction inst); + void Write_U64(u64 var, u32 address, UGeckoInstruction inst); - void Write_U16_Swap(u32 var, u32 address); - void Write_U32_Swap(u32 var, u32 address); - void Write_U64_Swap(u64 var, u32 address); + void Write_U16_Swap(u32 var, u32 address, UGeckoInstruction inst); + void Write_U32_Swap(u32 var, u32 address, UGeckoInstruction inst); + void Write_U64_Swap(u64 var, u32 address, UGeckoInstruction inst); void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks); void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks); - void ClearDCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned + void + ClearDCacheLine(u32 address, + UGeckoInstruction inst); // Zeroes 32 bytes; address should be 32-byte-aligned void StoreDCacheLine(u32 address); void InvalidateDCacheLine(u32 address); void FlushDCacheLine(u32 address); @@ -306,9 +309,9 @@ class MMU void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr); template - T ReadFromHardware(u32 effective_address); + T ReadFromHardware(u32 effective_address, UGeckoInstruction inst); template - void WriteToHardware(u32 effective_address, u32 data, u32 size); + void WriteToHardware(u32 effective_address, u32 data, u32 size, UGeckoInstruction inst); template bool IsRAMAddress(u32 address, bool translate); @@ -328,16 +331,18 @@ class MMU BatTable m_dbat_table; }; -void ClearDCacheLineFromJit(MMU& mmu, u32 address); -u32 ReadU8FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value -u32 ReadU16FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value -u32 ReadU32FromJit(MMU& mmu, u32 address); -u64 ReadU64FromJit(MMU& mmu, u32 address); -void WriteU8FromJit(MMU& mmu, u32 var, u32 address); -void WriteU16FromJit(MMU& mmu, u32 var, u32 address); -void WriteU32FromJit(MMU& mmu, u32 var, u32 address); -void WriteU64FromJit(MMU& mmu, u64 var, u32 address); -void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address); -void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address); -void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address); +void ClearDCacheLineFromJit(MMU& mmu, u32 address, UGeckoInstruction inst); +u32 ReadU8FromJit(MMU& mmu, u32 address, + UGeckoInstruction inst); // Returns zero-extended 32bit value +u32 ReadU16FromJit(MMU& mmu, u32 address, + UGeckoInstruction inst); // Returns zero-extended 32bit value +u32 ReadU32FromJit(MMU& mmu, u32 address, UGeckoInstruction inst); +u64 ReadU64FromJit(MMU& mmu, u32 address, UGeckoInstruction inst); +void WriteU8FromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); +void WriteU16FromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); +void WriteU32FromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); +void WriteU64FromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst); +void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); +void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst); +void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst); } // namespace PowerPC