From 82fbdd2f0b9968f323906f493ca45ab537d0162a Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 4 Jul 2021 20:47:04 +0200 Subject: [PATCH] PowerPC: Raise alignment exceptions in more situations Intends to improve https://bugs.dolphin-emu.org/issues/12565. To avoid affecting the performance, the JITs will most of the time not raise alignment exceptions unless you enable the new INI-only setting AlignmentExceptions. --- Source/Core/Common/Arm64Emitter.cpp | 9 +- Source/Core/Common/Arm64Emitter.h | 3 +- Source/Core/Common/x64Emitter.h | 20 ++ Source/Core/Core/Boot/Boot_BS2Emu.cpp | 12 +- Source/Core/Core/BootManager.cpp | 6 + Source/Core/Core/Config/MainSettings.cpp | 1 + Source/Core/Core/Config/MainSettings.h | 1 + .../ConfigLoaders/NetPlayConfigLoader.cpp | 1 + Source/Core/Core/ConfigManager.cpp | 3 + Source/Core/Core/ConfigManager.h | 1 + Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 8 +- Source/Core/Core/HLE/HLE_OS.cpp | 2 +- Source/Core/Core/NetPlayClient.cpp | 1 + Source/Core/Core/NetPlayProto.h | 1 + Source/Core/Core/NetPlayServer.cpp | 2 + .../Core/PowerPC/Interpreter/ExceptionUtils.h | 11 +- .../Interpreter/Interpreter_LoadStore.cpp | 287 ++++----------- .../Interpreter_LoadStorePaired.cpp | 118 +++---- .../Interpreter/Interpreter_Tables.cpp | 2 +- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 16 +- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 8 +- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 101 ++++-- .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 19 +- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 12 +- .../PowerPC/Jit64Common/TrampolineCache.cpp | 6 +- .../Core/PowerPC/Jit64Common/TrampolineInfo.h | 4 + Source/Core/Core/PowerPC/JitArm64/Jit.h | 17 +- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 47 ++- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 27 +- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 30 +- .../JitArm64/JitArm64_LoadStorePaired.cpp | 5 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 10 + .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 3 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 1 + Source/Core/Core/PowerPC/MMU.cpp | 326 +++++++++++++----- Source/Core/Core/PowerPC/MMU.h | 53 ++- Source/Core/Core/PowerPC/PowerPC.cpp | 3 +- 37 files changed, 671 insertions(+), 506 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index bd206f8f4b74..5563390e97e4 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -867,11 +867,16 @@ static constexpr u32 MaskImm26(s64 distance) } // FixupBranch branching -void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch) +{ + SetJumpTarget(branch, m_code); +} + +void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { bool Not = false; u32 inst = 0; - s64 distance = (s64)(m_code - branch.ptr); + s64 distance = static_cast(target - branch.ptr); distance >>= 2; switch (branch.type) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index c91bae36ac8d..f4449f6ca658 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -567,7 +567,8 @@ class ARM64XEmitter u8* GetWritableCodePtr(); // FixupBranch branching - void SetJumpTarget(FixupBranch const& branch); + void SetJumpTarget(const FixupBranch& branch); + static void SetJumpTarget(const FixupBranch& branch, const u8* target); FixupBranch CBZ(ARM64Reg Rt); FixupBranch CBNZ(ARM64Reg Rt); FixupBranch B(CCFlags cond); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index e3a3a9e25794..f2236fb64039 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -1120,6 +1120,15 @@ class XEmitter ABI_CallFunction(func); } + // Pass two registers and one constant as parameters. + template + void ABI_CallFunctionRRC(FunctionPointer func, X64Reg reg1, X64Reg reg2, u32 param3) + { + MOVTwo(64, ABI_PARAM1, reg1, 0, ABI_PARAM2, reg2); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + ABI_CallFunction(func); + } + // Pass a pointer and two registers as parameters. template void ABI_CallFunctionPRR(FunctionPointer func, const void* ptr, X64Reg reg1, X64Reg reg2) @@ -1138,6 +1147,17 @@ class XEmitter ABI_CallFunction(func); } + template + void ABI_CallFunctionACC(int bits, FunctionPointer func, const Gen::OpArg& arg1, u32 param2, + u32 param3) + { + if (!arg1.IsSimpleReg(ABI_PARAM1)) + MOV(bits, R(ABI_PARAM1), arg1); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + ABI_CallFunction(func); + } + template void ABI_CallFunctionA(int bits, FunctionPointer func, const Gen::OpArg& arg1) { diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index dcc2bb22ad77..c6be684fc75b 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -114,9 +114,9 @@ bool CBoot::RunApploader(bool is_wii, const DiscIO::VolumeDisc& volume) PowerPC::ppcState.gpr[4] = iAppLoaderFuncAddr + 4; PowerPC::ppcState.gpr[5] = iAppLoaderFuncAddr + 8; RunFunction(*entry); - const u32 iAppLoaderInit = PowerPC::Read_U32(iAppLoaderFuncAddr + 0); - const u32 iAppLoaderMain = PowerPC::Read_U32(iAppLoaderFuncAddr + 4); - const u32 iAppLoaderClose = PowerPC::Read_U32(iAppLoaderFuncAddr + 8); + const u32 iAppLoaderInit = PowerPC::Read_U32(iAppLoaderFuncAddr + 0, UGeckoInstruction{}); + const u32 iAppLoaderMain = PowerPC::Read_U32(iAppLoaderFuncAddr + 4, UGeckoInstruction{}); + const u32 iAppLoaderClose = PowerPC::Read_U32(iAppLoaderFuncAddr + 8, UGeckoInstruction{}); // iAppLoaderInit DEBUG_LOG_FMT(MASTER_LOG, "Call iAppLoaderInit"); @@ -141,9 +141,9 @@ bool CBoot::RunApploader(bool is_wii, const DiscIO::VolumeDisc& volume) // iAppLoaderMain returns 0 when there are no more sections to copy. while (PowerPC::ppcState.gpr[3] != 0x00) { - const u32 ram_address = PowerPC::Read_U32(0x81300004); - const u32 length = PowerPC::Read_U32(0x81300008); - const u32 dvd_offset = PowerPC::Read_U32(0x8130000c) << (is_wii ? 2 : 0); + const u32 ram_address = PowerPC::Read_U32(0x81300004, UGeckoInstruction{}); + const u32 length = PowerPC::Read_U32(0x81300008, UGeckoInstruction{}); + const u32 dvd_offset = PowerPC::Read_U32(0x8130000c, UGeckoInstruction{}) << (is_wii ? 2 : 0); INFO_LOG_FMT(MASTER_LOG, "DVDRead: offset: {:08x} memOffset: {:08x} length: {}", dvd_offset, ram_address, length); diff --git a/Source/Core/Core/BootManager.cpp b/Source/Core/Core/BootManager.cpp index 132dbe900e67..03c7d81a85d6 100644 --- a/Source/Core/Core/BootManager.cpp +++ b/Source/Core/Core/BootManager.cpp @@ -77,6 +77,7 @@ struct ConfigCache bool bJITFollowBranch; bool bEnableCheats; bool bSyncGPUOnSkipIdleHack; + bool bAlignmentExceptions; bool bFPRF; bool bAccurateNaNs; bool bMMU; @@ -112,6 +113,7 @@ void ConfigCache::SaveConfig(const SConfig& config) bJITFollowBranch = config.bJITFollowBranch; bEnableCheats = config.bEnableCheats; bSyncGPUOnSkipIdleHack = config.bSyncGPUOnSkipIdleHack; + bAlignmentExceptions = config.bAlignmentExceptions; bFPRF = config.bFPRF; bAccurateNaNs = config.bAccurateNaNs; bDisableICache = config.bDisableICache; @@ -158,6 +160,7 @@ void ConfigCache::RestoreConfig(SConfig* config) config->bJITFollowBranch = bJITFollowBranch; config->bEnableCheats = bEnableCheats; config->bSyncGPUOnSkipIdleHack = bSyncGPUOnSkipIdleHack; + config->bAlignmentExceptions = bAlignmentExceptions; config->bFPRF = bFPRF; config->bAccurateNaNs = bAccurateNaNs; config->bDisableICache = bDisableICache; @@ -261,6 +264,8 @@ bool BootCore(std::unique_ptr boot, const WindowSystemInfo& wsi) core_section->Get("EnableCheats", &StartUp.bEnableCheats, StartUp.bEnableCheats); core_section->Get("SyncOnSkipIdle", &StartUp.bSyncGPUOnSkipIdleHack, StartUp.bSyncGPUOnSkipIdleHack); + core_section->Get("AlignmentExceptions", &StartUp.bAlignmentExceptions, + StartUp.bAlignmentExceptions); core_section->Get("FPRF", &StartUp.bFPRF, StartUp.bFPRF); core_section->Get("AccurateNaNs", &StartUp.bAccurateNaNs, StartUp.bAccurateNaNs); core_section->Get("DisableICache", &StartUp.bDisableICache, StartUp.bDisableICache); @@ -373,6 +378,7 @@ bool BootCore(std::unique_ptr boot, const WindowSystemInfo& wsi) config_cache.bSetEXIDevice[0] = true; config_cache.bSetEXIDevice[1] = true; config_cache.bSetEXIDevice[2] = true; + StartUp.bAlignmentExceptions = netplay_settings.m_AlignmentExceptions; StartUp.bFPRF = netplay_settings.m_FPRF; StartUp.bAccurateNaNs = netplay_settings.m_AccurateNaNs; StartUp.bDisableICache = netplay_settings.m_DisableICache; diff --git a/Source/Core/Core/Config/MainSettings.cpp b/Source/Core/Core/Config/MainSettings.cpp index 08f0972e4886..97211dea5def 100644 --- a/Source/Core/Core/Config/MainSettings.cpp +++ b/Source/Core/Core/Config/MainSettings.cpp @@ -87,6 +87,7 @@ const Info MAIN_SYNC_GPU_MIN_DISTANCE{{System::Main, "Core", "SyncGpuMinDis const Info MAIN_SYNC_GPU_OVERCLOCK{{System::Main, "Core", "SyncGpuOverclock"}, 1.0f}; const Info MAIN_FAST_DISC_SPEED{{System::Main, "Core", "FastDiscSpeed"}, false}; const Info MAIN_LOW_DCBZ_HACK{{System::Main, "Core", "LowDCBZHack"}, false}; +const Info MAIN_ALIGNMENT_EXCEPTIONS{{System::Main, "Core", "AlignmentExceptions"}, false}; const Info MAIN_FPRF{{System::Main, "Core", "FPRF"}, false}; const Info MAIN_ACCURATE_NANS{{System::Main, "Core", "AccurateNaNs"}, false}; const Info MAIN_DISABLE_ICACHE{{System::Main, "Core", "DisableICache"}, false}; diff --git a/Source/Core/Core/Config/MainSettings.h b/Source/Core/Core/Config/MainSettings.h index 717f40973001..e6220fc57082 100644 --- a/Source/Core/Core/Config/MainSettings.h +++ b/Source/Core/Core/Config/MainSettings.h @@ -68,6 +68,7 @@ extern const Info MAIN_SYNC_GPU_MIN_DISTANCE; extern const Info MAIN_SYNC_GPU_OVERCLOCK; extern const Info MAIN_FAST_DISC_SPEED; extern const Info MAIN_LOW_DCBZ_HACK; +extern const Info MAIN_ALIGNMENT_EXCEPTIONS; extern const Info MAIN_FPRF; extern const Info MAIN_ACCURATE_NANS; extern const Info MAIN_DISABLE_ICACHE; diff --git a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp index 26e66bafe159..f2836de3266b 100644 --- a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp @@ -69,6 +69,7 @@ class NetPlayConfigLayerLoader final : public Config::ConfigLayerLoader layer->Set(Config::GFX_SAFE_TEXTURE_CACHE_COLOR_SAMPLES, m_settings.m_SafeTextureCacheColorSamples); layer->Set(Config::GFX_PERF_QUERIES_ENABLE, m_settings.m_PerfQueriesEnable); + layer->Set(Config::MAIN_ALIGNMENT_EXCEPTIONS, m_settings.m_AlignmentExceptions); layer->Set(Config::MAIN_FPRF, m_settings.m_FPRF); layer->Set(Config::MAIN_ACCURATE_NANS, m_settings.m_AccurateNaNs); layer->Set(Config::MAIN_DISABLE_ICACHE, m_settings.m_DisableICache); diff --git a/Source/Core/Core/ConfigManager.cpp b/Source/Core/Core/ConfigManager.cpp index ab9c9e26fd64..62b9530d0ce8 100644 --- a/Source/Core/Core/ConfigManager.cpp +++ b/Source/Core/Core/ConfigManager.cpp @@ -214,6 +214,7 @@ void SConfig::SaveCoreSettings(IniFile& ini) core->Set("SyncGpuMaxDistance", iSyncGpuMaxDistance); core->Set("SyncGpuMinDistance", iSyncGpuMinDistance); core->Set("SyncGpuOverclock", fSyncGpuOverclock); + core->Set("AlignmentExceptions", bAlignmentExceptions); core->Set("FPRF", bFPRF); core->Set("AccurateNaNs", bAccurateNaNs); core->Set("EnableCheats", bEnableCheats); @@ -512,6 +513,7 @@ void SConfig::LoadCoreSettings(IniFile& ini) core->Get("SyncGpuOverclock", &fSyncGpuOverclock, 1.0f); core->Get("FastDiscSpeed", &bFastDiscSpeed, false); core->Get("LowDCBZHack", &bLowDCBZHack, false); + core->Get("AlignmentExceptions", &bAlignmentExceptions, false); core->Get("FPRF", &bFPRF, false); core->Get("AccurateNaNs", &bAccurateNaNs, false); core->Get("DisableICache", &bDisableICache, false); @@ -750,6 +752,7 @@ void SConfig::LoadDefaults() bRunCompareServer = false; bDSPHLE = true; bFastmem = true; + bAlignmentExceptions = false; bFPRF = false; bAccurateNaNs = false; bDisableICache = false; diff --git a/Source/Core/Core/ConfigManager.h b/Source/Core/Core/ConfigManager.h index 316eed85b9c4..db3f269588f2 100644 --- a/Source/Core/Core/ConfigManager.h +++ b/Source/Core/Core/ConfigManager.h @@ -109,6 +109,7 @@ struct SConfig bool bJITRegisterCacheOff = false; bool bFastmem; + bool bAlignmentExceptions = false; bool bFPRF = false; bool bAccurateNaNs = false; bool bDisableICache = false; diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index da35c4464636..883702fc0584 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -523,12 +523,12 @@ void FifoPlayer::LoadTextureMemory() void FifoPlayer::WriteCP(u32 address, u16 value) { - PowerPC::Write_U16(value, 0xCC000000 | address); + PowerPC::Write_U16(value, 0xCC000000 | address, UGeckoInstruction{}); } void FifoPlayer::WritePI(u32 address, u32 value) { - PowerPC::Write_U32(value, 0xCC003000 | address); + PowerPC::Write_U32(value, 0xCC003000 | address, UGeckoInstruction{}); } void FifoPlayer::FlushWGP() @@ -604,13 +604,13 @@ bool FifoPlayer::ShouldLoadXF(u8 reg) bool FifoPlayer::IsIdleSet() { CommandProcessor::UCPStatusReg status = - PowerPC::Read_U16(0xCC000000 | CommandProcessor::STATUS_REGISTER); + PowerPC::Read_U16(0xCC000000 | CommandProcessor::STATUS_REGISTER, UGeckoInstruction{}); return status.CommandIdle; } bool FifoPlayer::IsHighWatermarkSet() { CommandProcessor::UCPStatusReg status = - PowerPC::Read_U16(0xCC000000 | CommandProcessor::STATUS_REGISTER); + PowerPC::Read_U16(0xCC000000 | CommandProcessor::STATUS_REGISTER, UGeckoInstruction{}); return status.OverflowHiWatermark; } diff --git a/Source/Core/Core/HLE/HLE_OS.cpp b/Source/Core/Core/HLE/HLE_OS.cpp index 522f9e936f90..3e7dc6b311f7 100644 --- a/Source/Core/Core/HLE/HLE_OS.cpp +++ b/Source/Core/Core/HLE/HLE_OS.cpp @@ -103,7 +103,7 @@ void HLE_write_console() std::string report_message = GetStringVA(4); if (PowerPC::HostIsRAMAddress(GPR(5))) { - const u32 size = PowerPC::Read_U32(GPR(5)); + const u32 size = PowerPC::HostRead_U32(GPR(5)); if (size > report_message.size()) WARN_LOG_FMT(OSREPORT_HLE, "__write_console uses an invalid size of {:#010x}", size); else if (size == 0) diff --git a/Source/Core/Core/NetPlayClient.cpp b/Source/Core/Core/NetPlayClient.cpp index ab2ab8dd406a..024f3cf6f834 100644 --- a/Source/Core/Core/NetPlayClient.cpp +++ b/Source/Core/Core/NetPlayClient.cpp @@ -696,6 +696,7 @@ unsigned int NetPlayClient::OnData(sf::Packet& packet) packet >> m_net_settings.m_EFBEmulateFormatChanges; packet >> m_net_settings.m_SafeTextureCacheColorSamples; packet >> m_net_settings.m_PerfQueriesEnable; + packet >> m_net_settings.m_AlignmentExceptions; packet >> m_net_settings.m_FPRF; packet >> m_net_settings.m_AccurateNaNs; packet >> m_net_settings.m_DisableICache; diff --git a/Source/Core/Core/NetPlayProto.h b/Source/Core/Core/NetPlayProto.h index 640c0989bd15..c484563fb62c 100644 --- a/Source/Core/Core/NetPlayProto.h +++ b/Source/Core/Core/NetPlayProto.h @@ -58,6 +58,7 @@ struct NetSettings bool m_EFBEmulateFormatChanges; int m_SafeTextureCacheColorSamples; bool m_PerfQueriesEnable; + bool m_AlignmentExceptions; bool m_FPRF; bool m_AccurateNaNs; bool m_DisableICache; diff --git a/Source/Core/Core/NetPlayServer.cpp b/Source/Core/Core/NetPlayServer.cpp index f8c148ca4c02..eec114882d56 100644 --- a/Source/Core/Core/NetPlayServer.cpp +++ b/Source/Core/Core/NetPlayServer.cpp @@ -1278,6 +1278,7 @@ bool NetPlayServer::SetupNetSettings() settings.m_SafeTextureCacheColorSamples = Config::Get(Config::GFX_SAFE_TEXTURE_CACHE_COLOR_SAMPLES); settings.m_PerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE); + settings.m_AlignmentExceptions = Config::Get(Config::MAIN_ALIGNMENT_EXCEPTIONS); settings.m_FPRF = Config::Get(Config::MAIN_FPRF); settings.m_AccurateNaNs = Config::Get(Config::MAIN_ACCURATE_NANS); settings.m_DisableICache = Config::Get(Config::MAIN_DISABLE_ICACHE); @@ -1453,6 +1454,7 @@ bool NetPlayServer::StartGame() spac << m_settings.m_EFBEmulateFormatChanges; spac << m_settings.m_SafeTextureCacheColorSamples; spac << m_settings.m_PerfQueriesEnable; + spac << m_settings.m_AlignmentExceptions; spac << m_settings.m_FPRF; spac << m_settings.m_AccurateNaNs; spac << m_settings.m_DisableICache; diff --git a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h index 5b3af8a83227..3a5f953428cb 100644 --- a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h @@ -8,10 +8,17 @@ #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/PowerPC.h" -inline void GenerateAlignmentException(u32 address) +inline void GenerateAlignmentException(u32 effective_address, UGeckoInstruction inst) { PowerPC::ppcState.Exceptions |= EXCEPTION_ALIGNMENT; - PowerPC::ppcState.spr[SPR_DAR] = address; + PowerPC::ppcState.spr[SPR_DAR] = effective_address; + + // It has not been hardware tested what gets used instead of RD and RA in + // the cases documented as undefined. For now, simply use RD and RA + const bool x = inst.OPCD >= 32; + const u32 op = x ? inst.SUBOP10 : (inst.OPCD >> 1); + const u32 dsisr = ((op >> 8) << 15) | ((op & 0b11111) << 10) | (inst.RD << 5) | (inst.RA); + PowerPC::ppcState.spr[SPR_DSISR] = dsisr; } inline void GenerateDSIException(u32 address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index ce6dd0fdad3b..339cb27df764 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -41,7 +41,7 @@ static u32 Helper_Get_EA_UX(const PowerPC::PowerPCState& ppcs, const UGeckoInstr void Interpreter::lbz(UGeckoInstruction inst) { - const u32 temp = PowerPC::Read_U8(Helper_Get_EA(PowerPC::ppcState, inst)); + const u32 temp = PowerPC::Read_U8(Helper_Get_EA(PowerPC::ppcState, inst), inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) rGPR[inst.RD] = temp; @@ -50,7 +50,7 @@ void Interpreter::lbz(UGeckoInstruction inst) void Interpreter::lbzu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U8(address); + const u32 temp = PowerPC::Read_U8(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -62,14 +62,7 @@ void Interpreter::lbzu(UGeckoInstruction inst) void Interpreter::lfd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u64 temp = PowerPC::Read_U64(address); + const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) rPS(inst.FD).SetPS0(temp); @@ -78,14 +71,7 @@ void Interpreter::lfd(UGeckoInstruction inst) void Interpreter::lfdu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u64 temp = PowerPC::Read_U64(address); + const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -97,14 +83,7 @@ void Interpreter::lfdu(UGeckoInstruction inst) void Interpreter::lfdux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u64 temp = PowerPC::Read_U64(address); + const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -116,14 +95,7 @@ void Interpreter::lfdux(UGeckoInstruction inst) void Interpreter::lfdx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u64 temp = PowerPC::Read_U64(address); + const u64 temp = PowerPC::Read_U64(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) rPS(inst.FD).SetPS0(temp); @@ -132,14 +104,7 @@ void Interpreter::lfdx(UGeckoInstruction inst) void Interpreter::lfs(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -151,14 +116,7 @@ void Interpreter::lfs(UGeckoInstruction inst) void Interpreter::lfsu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -171,14 +129,7 @@ void Interpreter::lfsu(UGeckoInstruction inst) void Interpreter::lfsux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -191,14 +142,7 @@ void Interpreter::lfsux(UGeckoInstruction inst) void Interpreter::lfsx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -209,7 +153,7 @@ void Interpreter::lfsx(UGeckoInstruction inst) void Interpreter::lha(UGeckoInstruction inst) { - const u32 temp = (u32)(s32)(s16)PowerPC::Read_U16(Helper_Get_EA(PowerPC::ppcState, inst)); + const u32 temp = (u32)(s32)(s16)PowerPC::Read_U16(Helper_Get_EA(PowerPC::ppcState, inst), inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -220,7 +164,7 @@ void Interpreter::lha(UGeckoInstruction inst) void Interpreter::lhau(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - const u32 temp = (u32)(s32)(s16)PowerPC::Read_U16(address); + const u32 temp = (u32)(s32)(s16)PowerPC::Read_U16(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -231,7 +175,7 @@ void Interpreter::lhau(UGeckoInstruction inst) void Interpreter::lhz(UGeckoInstruction inst) { - const u32 temp = PowerPC::Read_U16(Helper_Get_EA(PowerPC::ppcState, inst)); + const u32 temp = PowerPC::Read_U16(Helper_Get_EA(PowerPC::ppcState, inst), inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -242,7 +186,7 @@ void Interpreter::lhz(UGeckoInstruction inst) void Interpreter::lhzu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U16(address); + const u32 temp = PowerPC::Read_U16(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -256,15 +200,9 @@ void Interpreter::lmw(UGeckoInstruction inst) { u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0 || MSR.LE) - { - GenerateAlignmentException(address); - return; - } - for (int i = inst.RD; i <= 31; i++, address += 4) { - const u32 temp_reg = PowerPC::Read_U32(address); + const u32 temp_reg = PowerPC::Read_U32(address, inst); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { @@ -287,15 +225,9 @@ void Interpreter::stmw(UGeckoInstruction inst) { u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0 || MSR.LE) - { - GenerateAlignmentException(address); - return; - } - for (int i = inst.RS; i <= 31; i++, address += 4) { - PowerPC::Write_U32(rGPR[i], address); + PowerPC::Write_U32(rGPR[i], address, inst); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { PanicAlertFmt("DSI exception in stmw"); @@ -308,7 +240,7 @@ void Interpreter::stmw(UGeckoInstruction inst) void Interpreter::lwz(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -319,7 +251,7 @@ void Interpreter::lwz(UGeckoInstruction inst) void Interpreter::lwzu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -330,14 +262,14 @@ void Interpreter::lwzu(UGeckoInstruction inst) void Interpreter::stb(UGeckoInstruction inst) { - PowerPC::Write_U8((u8)rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); + PowerPC::Write_U8((u8)rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst), inst); } void Interpreter::stbu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - PowerPC::Write_U8((u8)rGPR[inst.RS], address); + PowerPC::Write_U8((u8)rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -348,26 +280,14 @@ void Interpreter::stfd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); } void Interpreter::stfdu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -378,26 +298,14 @@ void Interpreter::stfs(UGeckoInstruction inst) { const u32 address = Helper_Get_EA(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); } void Interpreter::stfsu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -406,14 +314,14 @@ void Interpreter::stfsu(UGeckoInstruction inst) void Interpreter::sth(UGeckoInstruction inst) { - PowerPC::Write_U16((u16)rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); + PowerPC::Write_U16((u16)rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst), inst); } void Interpreter::sthu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - PowerPC::Write_U16((u16)rGPR[inst.RS], address); + PowerPC::Write_U16((u16)rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -422,14 +330,14 @@ void Interpreter::sthu(UGeckoInstruction inst) void Interpreter::stw(UGeckoInstruction inst) { - PowerPC::Write_U32(rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); + PowerPC::Write_U32(rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst), inst); } void Interpreter::stwu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - PowerPC::Write_U32(rGPR[inst.RS], address); + PowerPC::Write_U32(rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -505,7 +413,7 @@ void Interpreter::dcbz(UGeckoInstruction inst) if (!HID0.DCE) { - GenerateAlignmentException(dcbz_addr); + GenerateAlignmentException(dcbz_addr, inst); return; } @@ -514,7 +422,7 @@ void Interpreter::dcbz(UGeckoInstruction inst) return; // TODO: Implement some sort of L2 emulation. - PowerPC::ClearCacheLine(dcbz_addr & (~31)); + PowerPC::ClearCacheLine(dcbz_addr & (~31), inst); } void Interpreter::dcbz_l(UGeckoInstruction inst) @@ -529,12 +437,12 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) if (!HID0.DCE) { - GenerateAlignmentException(address); + GenerateAlignmentException(address, inst); return; } // FAKE: clear memory instead of clearing the cache block - PowerPC::ClearCacheLine(address & (~31)); + PowerPC::ClearCacheLine(address & (~31), inst); } // eciwx/ecowx technically should access the specified device @@ -549,13 +457,11 @@ void Interpreter::eciwx(UGeckoInstruction inst) return; } - if (EA & 3) + const u32 temp = PowerPC::Read_U32(EA, inst); + if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { - GenerateAlignmentException(EA); - return; + rGPR[inst.RA] = temp; } - - rGPR[inst.RD] = PowerPC::Read_U32(EA); } void Interpreter::ecowx(UGeckoInstruction inst) @@ -568,13 +474,7 @@ void Interpreter::ecowx(UGeckoInstruction inst) return; } - if (EA & 3) - { - GenerateAlignmentException(EA); - return; - } - - PowerPC::Write_U32(rGPR[inst.RS], EA); + PowerPC::Write_U32(rGPR[inst.RS], EA, inst); } void Interpreter::eieio(UGeckoInstruction inst) @@ -595,7 +495,7 @@ void Interpreter::icbi(UGeckoInstruction inst) void Interpreter::lbzux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U8(address); + const u32 temp = PowerPC::Read_U8(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -606,7 +506,7 @@ void Interpreter::lbzux(UGeckoInstruction inst) void Interpreter::lbzx(UGeckoInstruction inst) { - const u32 temp = PowerPC::Read_U8(Helper_Get_EA_X(PowerPC::ppcState, inst)); + const u32 temp = PowerPC::Read_U8(Helper_Get_EA_X(PowerPC::ppcState, inst), inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -617,7 +517,7 @@ void Interpreter::lbzx(UGeckoInstruction inst) void Interpreter::lhaux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - const s32 temp = (s32)(s16)PowerPC::Read_U16(address); + const s32 temp = (s32)(s16)PowerPC::Read_U16(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -628,7 +528,7 @@ void Interpreter::lhaux(UGeckoInstruction inst) void Interpreter::lhax(UGeckoInstruction inst) { - const s32 temp = (s32)(s16)PowerPC::Read_U16(Helper_Get_EA_X(PowerPC::ppcState, inst)); + const s32 temp = (s32)(s16)PowerPC::Read_U16(Helper_Get_EA_X(PowerPC::ppcState, inst), inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -638,7 +538,8 @@ void Interpreter::lhax(UGeckoInstruction inst) void Interpreter::lhbrx(UGeckoInstruction inst) { - const u32 temp = Common::swap16(PowerPC::Read_U16(Helper_Get_EA_X(PowerPC::ppcState, inst))); + const u32 temp = + Common::swap16(PowerPC::Read_U16(Helper_Get_EA_X(PowerPC::ppcState, inst), inst)); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -649,7 +550,7 @@ void Interpreter::lhbrx(UGeckoInstruction inst) void Interpreter::lhzux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U16(address); + const u32 temp = PowerPC::Read_U16(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -660,7 +561,7 @@ void Interpreter::lhzux(UGeckoInstruction inst) void Interpreter::lhzx(UGeckoInstruction inst) { - const u32 temp = PowerPC::Read_U16(Helper_Get_EA_X(PowerPC::ppcState, inst)); + const u32 temp = PowerPC::Read_U16(Helper_Get_EA_X(PowerPC::ppcState, inst), inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -669,13 +570,14 @@ void Interpreter::lhzx(UGeckoInstruction inst) } // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswx(UGeckoInstruction inst) { u32 EA = Helper_Get_EA_X(PowerPC::ppcState, inst); if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -688,7 +590,7 @@ void Interpreter::lswx(UGeckoInstruction inst) if ((n & 3) == 0) rGPR[reg] = 0; - const u32 temp_value = PowerPC::Read_U8(EA) << (24 - offset); + const u32 temp_value = PowerPC::Read_U8(EA, inst) << (24 - offset); // Not64 (Homebrew N64 Emulator for Wii) triggers the following case. if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { @@ -703,7 +605,8 @@ void Interpreter::lswx(UGeckoInstruction inst) void Interpreter::lwbrx(UGeckoInstruction inst) { - const u32 temp = Common::swap32(PowerPC::Read_U32(Helper_Get_EA_X(PowerPC::ppcState, inst))); + const u32 temp = + Common::swap32(PowerPC::Read_U32(Helper_Get_EA_X(PowerPC::ppcState, inst), inst)); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -714,7 +617,7 @@ void Interpreter::lwbrx(UGeckoInstruction inst) void Interpreter::lwzux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -726,7 +629,7 @@ void Interpreter::lwzux(UGeckoInstruction inst) void Interpreter::lwzx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -738,7 +641,7 @@ void Interpreter::stbux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - PowerPC::Write_U8((u8)rGPR[inst.RS], address); + PowerPC::Write_U8((u8)rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -747,20 +650,14 @@ void Interpreter::stbux(UGeckoInstruction inst) void Interpreter::stbx(UGeckoInstruction inst) { - PowerPC::Write_U8((u8)rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); + PowerPC::Write_U8((u8)rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst), inst); } void Interpreter::stfdux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -771,13 +668,7 @@ void Interpreter::stfdx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address, inst); } // Stores Floating points into Integers indeXed @@ -785,26 +676,14 @@ void Interpreter::stfiwx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address); + PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address, inst); } void Interpreter::stfsux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -815,25 +694,20 @@ void Interpreter::stfsx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address, inst); } void Interpreter::sthbrx(UGeckoInstruction inst) { - PowerPC::Write_U16(Common::swap16((u16)rGPR[inst.RS]), Helper_Get_EA_X(PowerPC::ppcState, inst)); + PowerPC::Write_U16(Common::swap16((u16)rGPR[inst.RS]), Helper_Get_EA_X(PowerPC::ppcState, inst), + inst); } void Interpreter::sthux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - PowerPC::Write_U16((u16)rGPR[inst.RS], address); + PowerPC::Write_U16((u16)rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -842,11 +716,12 @@ void Interpreter::sthux(UGeckoInstruction inst) void Interpreter::sthx(UGeckoInstruction inst) { - PowerPC::Write_U16((u16)rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); + PowerPC::Write_U16((u16)rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst), inst); } // lswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::lswi(UGeckoInstruction inst) { u32 EA; @@ -857,7 +732,7 @@ void Interpreter::lswi(UGeckoInstruction inst) if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -878,7 +753,7 @@ void Interpreter::lswi(UGeckoInstruction inst) rGPR[r] = 0; } - const u32 temp_value = PowerPC::Read_U8(EA) << (24 - i); + const u32 temp_value = PowerPC::Read_U8(EA, inst) << (24 - i); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { PanicAlertFmt("DSI exception in lsw."); @@ -898,6 +773,7 @@ void Interpreter::lswi(UGeckoInstruction inst) // todo : optimize ? // stswi - bizarro string instruction // FIXME: Should rollback if a DSI occurs +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswi(UGeckoInstruction inst) { u32 EA; @@ -908,7 +784,7 @@ void Interpreter::stswi(UGeckoInstruction inst) if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -927,7 +803,7 @@ void Interpreter::stswi(UGeckoInstruction inst) r++; r &= 31; } - PowerPC::Write_U8((rGPR[r] >> (24 - i)) & 0xFF, EA); + PowerPC::Write_U8((rGPR[r] >> (24 - i)) & 0xFF, EA, inst); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { return; @@ -942,13 +818,14 @@ void Interpreter::stswi(UGeckoInstruction inst) } // TODO: is this right? is it DSI interruptible? +// TODO: Should this be able to cause alignment exceptions? void Interpreter::stswx(UGeckoInstruction inst) { u32 EA = Helper_Get_EA_X(PowerPC::ppcState, inst); if (MSR.LE) { - GenerateAlignmentException(EA); + GenerateAlignmentException(EA, inst); return; } @@ -958,7 +835,7 @@ void Interpreter::stswx(UGeckoInstruction inst) while (n > 0) { - PowerPC::Write_U8((rGPR[r] >> (24 - i)) & 0xFF, EA); + PowerPC::Write_U8((rGPR[r] >> (24 - i)) & 0xFF, EA, inst); EA++; n--; @@ -975,7 +852,7 @@ void Interpreter::stwbrx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - PowerPC::Write_U32(Common::swap32(rGPR[inst.RS]), address); + PowerPC::Write_U32(Common::swap32(rGPR[inst.RS]), address, inst); } // The following two instructions are for SMP communications. On a single @@ -985,13 +862,7 @@ void Interpreter::lwarx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - - const u32 temp = PowerPC::Read_U32(address); + const u32 temp = PowerPC::Read_U32(address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { @@ -1006,17 +877,11 @@ void Interpreter::stwcxd(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - if ((address & 0b11) != 0) - { - GenerateAlignmentException(address); - return; - } - if (m_reserve) { if (address == m_reserve_address) { - PowerPC::Write_U32(rGPR[inst.RS], address); + PowerPC::Write_U32(rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { m_reserve = false; @@ -1033,7 +898,7 @@ void Interpreter::stwux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - PowerPC::Write_U32(rGPR[inst.RS], address); + PowerPC::Write_U32(rGPR[inst.RS], address, inst); if (!(PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION)) { rGPR[inst.RA] = address; @@ -1044,7 +909,7 @@ void Interpreter::stwx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - PowerPC::Write_U32(rGPR[inst.RS], address); + PowerPC::Write_U32(rGPR[inst.RS], address, inst); } void Interpreter::sync(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index 7d3a8c9907c9..4e2014dc4587 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -68,111 +68,112 @@ SType ScaleAndClamp(double ps, u32 stScale) } template -static T ReadUnpaired(u32 addr); +static T ReadUnpaired(u32 addr, UGeckoInstruction inst); template <> -u8 ReadUnpaired(u32 addr) +u8 ReadUnpaired(u32 addr, UGeckoInstruction inst) { - return PowerPC::Read_U8(addr); + return PowerPC::Read_U8(addr, inst); } template <> -u16 ReadUnpaired(u32 addr) +u16 ReadUnpaired(u32 addr, UGeckoInstruction inst) { - return PowerPC::Read_U16(addr); + return PowerPC::Read_U16(addr, inst); } template <> -u32 ReadUnpaired(u32 addr) +u32 ReadUnpaired(u32 addr, UGeckoInstruction inst) { - return PowerPC::Read_U32(addr); + return PowerPC::Read_U32(addr, inst); } template -static std::pair ReadPair(u32 addr); +static std::pair ReadPair(u32 addr, UGeckoInstruction inst); template <> -std::pair ReadPair(u32 addr) +std::pair ReadPair(u32 addr, UGeckoInstruction inst) { - u16 val = PowerPC::Read_U16(addr); + u16 val = PowerPC::Read_U16(addr, inst); return {(u8)(val >> 8), (u8)val}; } template <> -std::pair ReadPair(u32 addr) +std::pair ReadPair(u32 addr, UGeckoInstruction inst) { - u32 val = PowerPC::Read_U32(addr); + u32 val = PowerPC::Read_U32(addr, inst); return {(u16)(val >> 16), (u16)val}; } template <> -std::pair ReadPair(u32 addr) +std::pair ReadPair(u32 addr, UGeckoInstruction inst) { - u64 val = PowerPC::Read_U64(addr); + u64 val = PowerPC::Read_U64(addr, inst); return {(u32)(val >> 32), (u32)val}; } template -static void WriteUnpaired(T val, u32 addr); +static void WriteUnpaired(T val, u32 addr, UGeckoInstruction inst); template <> -void WriteUnpaired(u8 val, u32 addr) +void WriteUnpaired(u8 val, u32 addr, UGeckoInstruction inst) { - PowerPC::Write_U8(val, addr); + PowerPC::Write_U8(val, addr, inst); } template <> -void WriteUnpaired(u16 val, u32 addr) +void WriteUnpaired(u16 val, u32 addr, UGeckoInstruction inst) { - PowerPC::Write_U16(val, addr); + PowerPC::Write_U16(val, addr, inst); } template <> -void WriteUnpaired(u32 val, u32 addr) +void WriteUnpaired(u32 val, u32 addr, UGeckoInstruction inst) { - PowerPC::Write_U32(val, addr); + PowerPC::Write_U32(val, addr, inst); } template -static void WritePair(T val1, T val2, u32 addr); +static void WritePair(T val1, T val2, u32 addr, UGeckoInstruction inst); template <> -void WritePair(u8 val1, u8 val2, u32 addr) +void WritePair(u8 val1, u8 val2, u32 addr, UGeckoInstruction inst) { - PowerPC::Write_U16(((u16)val1 << 8) | (u16)val2, addr); + PowerPC::Write_U16(((u16)val1 << 8) | (u16)val2, addr, inst); } template <> -void WritePair(u16 val1, u16 val2, u32 addr) +void WritePair(u16 val1, u16 val2, u32 addr, UGeckoInstruction inst) { - PowerPC::Write_U32(((u32)val1 << 16) | (u32)val2, addr); + PowerPC::Write_U32(((u32)val1 << 16) | (u32)val2, addr, inst); } template <> -void WritePair(u32 val1, u32 val2, u32 addr) +void WritePair(u32 val1, u32 val2, u32 addr, UGeckoInstruction inst) { - PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr); + PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr, inst); } template -void QuantizeAndStore(double ps0, double ps1, u32 addr, u32 instW, u32 stScale) +void QuantizeAndStore(UGeckoInstruction inst, double ps0, double ps1, u32 addr, u32 instW, + u32 stScale) { using U = std::make_unsigned_t; U convPS0 = (U)ScaleAndClamp(ps0, stScale); if (instW) { - WriteUnpaired(convPS0, addr); + WriteUnpaired(convPS0, addr, inst); } else { U convPS1 = (U)ScaleAndClamp(ps1, stScale); - WritePair(convPS0, convPS1, addr); + WritePair(convPS0, convPS1, addr, inst); } } -static void Helper_Quantize(const PowerPC::PowerPCState* ppcs, u32 addr, u32 instI, u32 instRS, - u32 instW) +static void Helper_Quantize(UGeckoInstruction inst, const PowerPC::PowerPCState* ppcs, u32 addr, + u32 instI, u32 instRS, u32 instW) { const UGQR gqr(ppcs->spr[SPR_GQR0 + instI]); const EQuantizeType stType = gqr.st_type; @@ -190,32 +191,32 @@ static void Helper_Quantize(const PowerPC::PowerPCState* ppcs, u32 addr, u32 ins if (instW) { - WriteUnpaired(conv_ps0, addr); + WriteUnpaired(conv_ps0, addr, inst); } else { const u64 integral_ps1 = Common::BitCast(ps1); const u32 conv_ps1 = ConvertToSingleFTZ(integral_ps1); - WritePair(conv_ps0, conv_ps1, addr); + WritePair(conv_ps0, conv_ps1, addr, inst); } break; } case QUANTIZE_U8: - QuantizeAndStore(ps0, ps1, addr, instW, stScale); + QuantizeAndStore(inst, ps0, ps1, addr, instW, stScale); break; case QUANTIZE_U16: - QuantizeAndStore(ps0, ps1, addr, instW, stScale); + QuantizeAndStore(inst, ps0, ps1, addr, instW, stScale); break; case QUANTIZE_S8: - QuantizeAndStore(ps0, ps1, addr, instW, stScale); + QuantizeAndStore(inst, ps0, ps1, addr, instW, stScale); break; case QUANTIZE_S16: - QuantizeAndStore(ps0, ps1, addr, instW, stScale); + QuantizeAndStore(inst, ps0, ps1, addr, instW, stScale); break; case QUANTIZE_INVALID1: @@ -227,20 +228,21 @@ static void Helper_Quantize(const PowerPC::PowerPCState* ppcs, u32 addr, u32 ins } template -std::pair LoadAndDequantize(u32 addr, u32 instW, u32 ldScale) +std::pair LoadAndDequantize(UGeckoInstruction inst, u32 addr, u32 instW, + u32 ldScale) { using U = std::make_unsigned_t; float ps0, ps1; if (instW) { - U value = ReadUnpaired(addr); + U value = ReadUnpaired(addr, inst); ps0 = (float)(T)(value)*m_dequantizeTable[ldScale]; ps1 = 1.0f; } else { - std::pair value = ReadPair(addr); + std::pair value = ReadPair(addr, inst); ps0 = (float)(T)(value.first) * m_dequantizeTable[ldScale]; ps1 = (float)(T)(value.second) * m_dequantizeTable[ldScale]; } @@ -248,8 +250,8 @@ std::pair LoadAndDequantize(u32 addr, u32 instW, u32 ldScale) return {static_cast(ps0), static_cast(ps1)}; } -static void Helper_Dequantize(PowerPC::PowerPCState* ppcs, u32 addr, u32 instI, u32 instRD, - u32 instW) +static void Helper_Dequantize(UGeckoInstruction inst, PowerPC::PowerPCState* ppcs, u32 addr, + u32 instI, u32 instRD, u32 instW) { UGQR gqr(ppcs->spr[SPR_GQR0 + instI]); EQuantizeType ldType = gqr.ld_type; @@ -263,32 +265,32 @@ static void Helper_Dequantize(PowerPC::PowerPCState* ppcs, u32 addr, u32 instI, case QUANTIZE_FLOAT: if (instW) { - const u32 value = ReadUnpaired(addr); + const u32 value = ReadUnpaired(addr, inst); ps0 = Common::BitCast(ConvertToDouble(value)); ps1 = 1.0; } else { - const std::pair value = ReadPair(addr); + const std::pair value = ReadPair(addr, inst); ps0 = Common::BitCast(ConvertToDouble(value.first)); ps1 = Common::BitCast(ConvertToDouble(value.second)); } break; case QUANTIZE_U8: - std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); + std::tie(ps0, ps1) = LoadAndDequantize(inst, addr, instW, ldScale); break; case QUANTIZE_U16: - std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); + std::tie(ps0, ps1) = LoadAndDequantize(inst, addr, instW, ldScale); break; case QUANTIZE_S8: - std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); + std::tie(ps0, ps1) = LoadAndDequantize(inst, addr, instW, ldScale); break; case QUANTIZE_S16: - std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); + std::tie(ps0, ps1) = LoadAndDequantize(inst, addr, instW, ldScale); break; case QUANTIZE_INVALID1: @@ -317,7 +319,7 @@ void Interpreter::psq_l(UGeckoInstruction inst) } const u32 EA = inst.RA ? (rGPR[inst.RA] + inst.SIMM_12) : (u32)inst.SIMM_12; - Helper_Dequantize(&PowerPC::ppcState, EA, inst.I, inst.RD, inst.W); + Helper_Dequantize(inst, &PowerPC::ppcState, EA, inst.I, inst.RD, inst.W); } void Interpreter::psq_lu(UGeckoInstruction inst) @@ -329,7 +331,7 @@ void Interpreter::psq_lu(UGeckoInstruction inst) } const u32 EA = rGPR[inst.RA] + inst.SIMM_12; - Helper_Dequantize(&PowerPC::ppcState, EA, inst.I, inst.RD, inst.W); + Helper_Dequantize(inst, &PowerPC::ppcState, EA, inst.I, inst.RD, inst.W); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { @@ -347,7 +349,7 @@ void Interpreter::psq_st(UGeckoInstruction inst) } const u32 EA = inst.RA ? (rGPR[inst.RA] + inst.SIMM_12) : (u32)inst.SIMM_12; - Helper_Quantize(&PowerPC::ppcState, EA, inst.I, inst.RS, inst.W); + Helper_Quantize(inst, &PowerPC::ppcState, EA, inst.I, inst.RS, inst.W); } void Interpreter::psq_stu(UGeckoInstruction inst) @@ -359,7 +361,7 @@ void Interpreter::psq_stu(UGeckoInstruction inst) } const u32 EA = rGPR[inst.RA] + inst.SIMM_12; - Helper_Quantize(&PowerPC::ppcState, EA, inst.I, inst.RS, inst.W); + Helper_Quantize(inst, &PowerPC::ppcState, EA, inst.I, inst.RS, inst.W); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { @@ -371,19 +373,19 @@ void Interpreter::psq_stu(UGeckoInstruction inst) void Interpreter::psq_lx(UGeckoInstruction inst) { const u32 EA = inst.RA ? (rGPR[inst.RA] + rGPR[inst.RB]) : rGPR[inst.RB]; - Helper_Dequantize(&PowerPC::ppcState, EA, inst.Ix, inst.RD, inst.Wx); + Helper_Dequantize(inst, &PowerPC::ppcState, EA, inst.Ix, inst.RD, inst.Wx); } void Interpreter::psq_stx(UGeckoInstruction inst) { const u32 EA = inst.RA ? (rGPR[inst.RA] + rGPR[inst.RB]) : rGPR[inst.RB]; - Helper_Quantize(&PowerPC::ppcState, EA, inst.Ix, inst.RS, inst.Wx); + Helper_Quantize(inst, &PowerPC::ppcState, EA, inst.Ix, inst.RS, inst.Wx); } void Interpreter::psq_lux(UGeckoInstruction inst) { const u32 EA = rGPR[inst.RA] + rGPR[inst.RB]; - Helper_Dequantize(&PowerPC::ppcState, EA, inst.Ix, inst.RD, inst.Wx); + Helper_Dequantize(inst, &PowerPC::ppcState, EA, inst.Ix, inst.RD, inst.Wx); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { @@ -395,7 +397,7 @@ void Interpreter::psq_lux(UGeckoInstruction inst) void Interpreter::psq_stux(UGeckoInstruction inst) { const u32 EA = rGPR[inst.RA] + rGPR[inst.RB]; - Helper_Quantize(&PowerPC::ppcState, EA, inst.Ix, inst.RS, inst.Wx); + Helper_Quantize(inst, &PowerPC::ppcState, EA, inst.Ix, inst.RS, inst.Wx); if (PowerPC::ppcState.Exceptions & ANY_LOADSTORE_EXCEPTION) { diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index dcb36a1d17d9..e7e13a0884cf 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -90,7 +90,7 @@ static std::array primarytable = {60, Interpreter::psq_st, {"psq_st", OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {61, Interpreter::psq_stu, {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - //missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 + //missing: 0, 1, 2, 5, 6, 9, 22, 30, 58, 62 }}; static std::array table4 = diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index d0c98a361cfa..b592fd197874 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -219,7 +219,7 @@ void Jit64::lXXx(UGeckoInstruction inst) if (update && storeAddress) registersInUse[RSCRATCH2] = true; - SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, registersInUse, signExtend); + SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, inst, registersInUse, signExtend); if (update && storeAddress) MOV(32, Ra, opAddress); @@ -301,7 +301,7 @@ void Jit64::dcbz(UGeckoInstruction inst) if (emit_fast_path) { - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0])); PUSH(RSCRATCH); SHR(32, R(RSCRATCH), Imm8(PowerPC::BAT_INDEX_SHIFT)); @@ -321,7 +321,7 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); - ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH); + ABI_CallFunctionAC(32, PowerPC::ClearCacheLine, R(RSCRATCH), inst.hex); ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (emit_fast_path) @@ -372,7 +372,7 @@ void Jit64::stX(UGeckoInstruction inst) const bool exception = [&] { RCOpArg Rs = gpr.Use(s, RCMode::Read); RegCache::Realize(Rs); - return WriteToConstAddress(accessSize, Rs, addr, CallerSavedRegistersInUse()); + return WriteToConstAddress(accessSize, Rs, addr, inst, CallerSavedRegistersInUse()); }(); if (update) { @@ -405,7 +405,7 @@ void Jit64::stX(UGeckoInstruction inst) reg_value = gpr.BindOrImm(s, RCMode::Read); } RegCache::Realize(Ra, reg_value); - SafeWriteRegToReg(reg_value, Ra, accessSize, offset, CallerSavedRegistersInUse(), + SafeWriteRegToReg(reg_value, Ra, accessSize, offset, inst, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); if (update) @@ -460,7 +460,7 @@ void Jit64::stXx(UGeckoInstruction inst) BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update) registersInUse[RSCRATCH2] = true; - SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, registersInUse, + SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, inst, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); if (update) @@ -483,7 +483,7 @@ void Jit64::lmw(UGeckoInstruction inst) } for (int i = d; i < 32; i++) { - SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4, + SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4, inst, CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false); RCOpArg Ri = gpr.Bind(i, RCMode::Write); RegCache::Realize(Ri); @@ -514,7 +514,7 @@ void Jit64::stmw(UGeckoInstruction inst) MOV(32, R(RSCRATCH2), Ri); Ri = RCOpArg::R(RSCRATCH2); } - SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16, + SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16, inst, CallerSavedRegistersInUse()); } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 347f67e2c663..cbde02f0fedc 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -67,7 +67,7 @@ void Jit64::lfXXX(UGeckoInstruction inst) BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && jo.memcheck) registersInUse[RSCRATCH2] = true; - SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false); + SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, inst, registersInUse, false); if (single) { @@ -134,7 +134,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) { u32 addr = (a ? gpr.Imm32(a) : 0) + imm; bool exception = - WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse()); + WriteToConstAddress(accessSize, R(RSCRATCH), addr, inst, CallerSavedRegistersInUse()); if (update) { @@ -180,7 +180,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) if (update) registersInUse[RSCRATCH2] = true; - SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, inst, registersInUse); if (update) MOV(32, Ra, R(RSCRATCH2)); @@ -207,5 +207,5 @@ void Jit64::stfiwx(UGeckoInstruction inst) MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg()); else MOV(32, R(RSCRATCH), Rs); - SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse()); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, inst, CallerSavedRegistersInUse()); } diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 5dff4c646616..12da2bf99a08 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -92,8 +92,15 @@ void EmuCodeBlock::SwitchToNearCode() SetCodePtr(m_near_code, m_near_code_end, m_near_code_write_failed); } -FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr, - BitSet32 registers_in_use) +FixupBranch EmuCodeBlock::CheckIfAlignmentSafeAddress(X64Reg reg_addr, int access_size, + UGeckoInstruction inst) +{ + TEST(32, R(reg_addr), Imm32(PowerPC::GetAlignmentMask(access_size))); + return J_CC(CC_NZ, m_far_code.Enabled()); +} + +FixupBranch EmuCodeBlock::CheckIfBATSafeAddress(const OpArg& reg_value, X64Reg reg_addr, + BitSet32 registers_in_use) { registers_in_use[reg_addr] = true; if (reg_value.IsSimpleReg()) @@ -108,7 +115,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_ if (reg_addr != RSCRATCH_EXTRA) MOV(32, R(RSCRATCH_EXTRA), R(reg_addr)); - // Perform lookup to see if we can use fast path. + // Perform BAT lookup to see if we can use fast path. MOV(64, R(RSCRATCH), ImmPtr(&PowerPC::dbat_table[0])); SHR(32, R(RSCRATCH_EXTRA), Imm8(PowerPC::BAT_INDEX_SHIFT)); TEST(32, MComplex(RSCRATCH, RSCRATCH_EXTRA, SCALE_4, 0), Imm32(PowerPC::BAT_PHYSICAL_BIT)); @@ -319,14 +326,17 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, } void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, - s32 offset, BitSet32 registersInUse, bool signExtend, int flags) + s32 offset, UGeckoInstruction inst, BitSet32 registersInUse, + bool signExtend, int flags) { - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, false); auto& js = m_jit.js; registersInUse[reg_value] = false; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !slowmem && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -334,6 +344,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov); TrampolineInfo& info = m_back_patch_info[mov.address]; info.pc = js.compilerPC; + info.inst = inst; info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG; info.start = backpatchStart; info.read = true; @@ -359,7 +370,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, if (opAddress.IsImm()) { u32 address = opAddress.Imm32() + offset; - SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend); + SafeLoadToRegImmediate(reg_value, address, accessSize, inst, registersInUse, signExtend); return; } @@ -377,13 +388,21 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(R(reg_value), reg_addr, registersInUse); + UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(true); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // Helps external systems know which instruction triggered the read. @@ -398,16 +417,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, switch (accessSize) { case 64: - ABI_CallFunctionR(PowerPC::Read_U64, reg_addr); + ABI_CallFunctionAC(32, PowerPC::Read_U64, R(reg_addr), inst.hex); break; case 32: - ABI_CallFunctionR(PowerPC::Read_U32, reg_addr); + ABI_CallFunctionAC(32, PowerPC::Read_U32, R(reg_addr), inst.hex); break; case 16: - ABI_CallFunctionR(PowerPC::Read_U16_ZX, reg_addr); + ABI_CallFunctionAC(32, PowerPC::Read_U16_ZX, R(reg_addr), inst.hex); break; case 8: - ABI_CallFunctionR(PowerPC::Read_U8_ZX, reg_addr); + ABI_CallFunctionAC(32, PowerPC::Read_U8_ZX, R(reg_addr), inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -435,10 +454,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, } void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize, - BitSet32 registersInUse, bool signExtend) + UGeckoInstruction inst, BitSet32 registersInUse, + bool signExtend) { // If the address is known to be RAM, just load it directly. - if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) + if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address, accessSize, inst)) { UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend); return; @@ -488,23 +508,26 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc } void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags) + UGeckoInstruction inst, BitSet32 registersInUse, int flags) { - bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); + const bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + const bool check_alignment = m_jit.jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, false); // set the correct immediate format reg_value = FixImmediate(accessSize, reg_value); auto& js = m_jit.js; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !slowmem && !check_alignment) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov); TrampolineInfo& info = m_back_patch_info[mov.address]; info.pc = js.compilerPC; + info.inst = inst; info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG; info.start = backpatchStart; info.read = false; @@ -545,13 +568,21 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { - FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); + FixupBranch slow_1; + if (check_alignment) + slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst); + FixupBranch slow_2 = CheckIfBATSafeAddress(reg_value, reg_addr, registersInUse); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + if (m_far_code.Enabled()) SwitchToFarCode(); else exit = J(true); - SetJumpTarget(slow); + + if (check_alignment) + SetJumpTarget(slow_1); + SetJumpTarget(slow_2); } // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs @@ -579,16 +610,19 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces switch (accessSize) { case 64: - ABI_CallFunctionRR(swap ? PowerPC::Write_U64 : PowerPC::Write_U64_Swap, reg, reg_addr); + ABI_CallFunctionRRC(swap ? PowerPC::Write_U64 : PowerPC::Write_U64_Swap, reg, reg_addr, + inst.hex); break; case 32: - ABI_CallFunctionRR(swap ? PowerPC::Write_U32 : PowerPC::Write_U32_Swap, reg, reg_addr); + ABI_CallFunctionRRC(swap ? PowerPC::Write_U32 : PowerPC::Write_U32_Swap, reg, reg_addr, + inst.hex); break; case 16: - ABI_CallFunctionRR(swap ? PowerPC::Write_U16 : PowerPC::Write_U16_Swap, reg, reg_addr); + ABI_CallFunctionRRC(swap ? PowerPC::Write_U16 : PowerPC::Write_U16_Swap, reg, reg_addr, + inst.hex); break; case 8: - ABI_CallFunctionRR(PowerPC::Write_U8, reg, reg_addr); + ABI_CallFunctionRRC(PowerPC::Write_U8, reg, reg_addr, inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -607,9 +641,10 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces } void EmuCodeBlock::SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, - s32 offset, BitSet32 registersInUse, int flags) + s32 offset, UGeckoInstruction inst, BitSet32 registersInUse, + int flags) { - SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags); + SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, inst, registersInUse, flags); } bool EmuCodeBlock::WriteClobbersRegValue(int accessSize, bool swap) @@ -618,7 +653,7 @@ bool EmuCodeBlock::WriteClobbersRegValue(int accessSize, bool swap) } bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, - BitSet32 registersInUse) + UGeckoInstruction inst, BitSet32 registersInUse) { arg = FixImmediate(accessSize, arg); @@ -644,7 +679,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, m_jit.js.fifoBytesSinceCheck += accessSize >> 3; return false; } - else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) + else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address, accessSize, inst)) { WriteToConstRamAddress(accessSize, arg, address); return false; @@ -658,16 +693,16 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, switch (accessSize) { case 64: - ABI_CallFunctionAC(64, PowerPC::Write_U64, arg, address); + ABI_CallFunctionACC(64, PowerPC::Write_U64, arg, address, inst.hex); break; case 32: - ABI_CallFunctionAC(32, PowerPC::Write_U32, arg, address); + ABI_CallFunctionACC(32, PowerPC::Write_U32, arg, address, inst.hex); break; case 16: - ABI_CallFunctionAC(16, PowerPC::Write_U16, arg, address); + ABI_CallFunctionACC(16, PowerPC::Write_U16, arg, address, inst.hex); break; case 8: - ABI_CallFunctionAC(8, PowerPC::Write_U8, arg, address); + ABI_CallFunctionACC(8, PowerPC::Write_U8, arg, address, inst.hex); break; } ABI_PopRegistersAndAdjustStack(registersInUse, 0); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 9f5c373df337..8114910ebf99 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -10,6 +10,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64Common/ConstantPool.h" #include "Core/PowerPC/Jit64Common/FarCodeCache.h" #include "Core/PowerPC/Jit64Common/TrampolineInfo.h" @@ -50,8 +51,10 @@ class EmuCodeBlock : public Gen::X64CodeBlock return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index)); } - Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, - BitSet32 registers_in_use); + Gen::FixupBranch CheckIfAlignmentSafeAddress(Gen::X64Reg reg_addr, int access_size, + UGeckoInstruction inst); + Gen::FixupBranch CheckIfBATSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, + BitSet32 registers_in_use); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, @@ -85,23 +88,25 @@ class EmuCodeBlock : public Gen::X64CodeBlock }; void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset, - BitSet32 registersInUse, bool signExtend, int flags = 0); + UGeckoInstruction inst, BitSet32 registersInUse, bool signExtend, + int flags = 0); void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize, - BitSet32 registersInUse, bool signExtend); + UGeckoInstruction inst, BitSet32 registersInUse, bool signExtend); // Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves // reg_value if the load fails and js.memcheck is enabled. // Works with immediate inputs and simple registers only. void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags = 0); + UGeckoInstruction inst, BitSet32 registersInUse, int flags = 0); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags = 0); + UGeckoInstruction inst, BitSet32 registersInUse, int flags = 0); // applies to safe and unsafe WriteRegToReg bool WriteClobbersRegValue(int accessSize, bool swap); // returns true if an exception could have been caused - bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse); + bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, UGeckoInstruction inst, + BitSet32 registersInUse); void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true); void JitGetAndClearCAOV(bool oe); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 4e79ca232098..dc7edb1abf7d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -340,6 +340,9 @@ alignas(16) static const float m_m128 = -128.0f; // Sizes of the various quantized store types constexpr std::array sizes{{32, 0, 0, 0, 8, 16, 8, 16}}; +// TODO: Use the actual instruction being emulated (needed for alignment exception emulation) +static const UGeckoInstruction ps_placeholder_instruction = 0; + void CommonAsmRoutines::GenQuantizedStores() { // Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_stXX). @@ -540,7 +543,8 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type, if (!single) flags |= SAFE_LOADSTORE_NO_SWAP; - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, size, 0, QUANTIZED_REGS_TO_SAVE, flags); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, size, 0, ps_placeholder_instruction, + QUANTIZED_REGS_TO_SAVE, flags); } void QuantizedMemoryRoutines::GenQuantizedStoreFloat(bool single, bool isInline) @@ -598,7 +602,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type, int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON | SAFE_LOADSTORE_NO_UPDATE_PC; - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags); + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, ps_placeholder_instruction, + regsToSave, extend, flags); if (!single && (type == QUANTIZE_U8 || type == QUANTIZE_S8)) { // TODO: Support not swapping in safeLoadToReg to avoid bswapping twice @@ -726,7 +731,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON | SAFE_LOADSTORE_NO_UPDATE_PC; - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags); + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, ps_placeholder_instruction, + regsToSave, extend, flags); } if (single) diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp index b5e3e66e2bc3..581a8355d173 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp @@ -45,8 +45,8 @@ const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info) const u8* trampoline = GetCodePtr(); - SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse, - info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM); + SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.inst, + info.registersInUse, info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM); JMP(info.start + info.len, true); @@ -64,7 +64,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info) // Don't treat FIFO writes specially for now because they require a burst // check anyway. - SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset, + SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset, info.inst, info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM); JMP(info.start + info.len, true); diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h index 2ca6b2826a53..a265496aaf19 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h @@ -7,6 +7,7 @@ #include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Gekko.h" // Stores information we need to batch-patch a MOV with a call to the slow read/write path after // it faults. There will be 10s of thousands of these structs live, so be wary of making this too @@ -22,6 +23,9 @@ struct TrampolineInfo final // The PPC PC for the current load/store block u32 pc; + // The instruction which is being emulated + UGeckoInstruction inst; + // Saved because we need these to make the ABI call in the trampoline BitSet32 registersInUse; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index c9f6d60919d9..f7ffada260a2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -11,6 +11,7 @@ #include "Common/Arm64Emitter.h" #include "Core/PowerPC/CPUCoreBase.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" @@ -178,11 +179,12 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA BitSet32 gprs; BitSet32 fprs; u32 flags; + UGeckoInstruction inst; bool operator<(const SlowmemHandler& rhs) const { - return std::tie(dest_reg, addr_reg, gprs, fprs, flags) < - std::tie(rhs.dest_reg, rhs.addr_reg, rhs.gprs, rhs.fprs, rhs.flags); + return std::tie(dest_reg, addr_reg, gprs, fprs, flags, inst.hex) < + std::tie(rhs.dest_reg, rhs.addr_reg, rhs.gprs, rhs.fprs, rhs.flags, rhs.inst.hex); } }; @@ -219,12 +221,15 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA // Backpatching routines bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); - void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, Arm64Gen::ARM64Reg RS, - Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), + void EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fastmem, bool do_farcode, + Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, + BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0)); // Loadstore routines - void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); - void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset); + void SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 offsetReg, u32 flags, + s32 offset, bool update); + void SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 regOffset, u32 flags, + s32 offset); void DoJit(u32 em_address, JitBlock* b, u32 nextPC); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 23236a524e9b..9df565d9d898 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -14,6 +14,7 @@ #include "Common/Swap.h" #include "Core/HW/Memmap.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/MMU.h" @@ -50,14 +51,25 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx) ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory); } -void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, ARM64Reg RS, - ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push) +void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, bool fastmem, + bool do_farcode, ARM64Reg RS, ARM64Reg addr, + BitSet32 gprs_to_push, BitSet32 fprs_to_push) { bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); + FixupBranch slowmem_fixup; + bool check_alignment = fastmem && do_farcode && jo.alignment_exceptions && + PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, false); + if (fastmem) { + if (check_alignment) + { + TSTI2R(addr, PowerPC::GetAlignmentMask(BackPatchInfo::GetFlagSize(flags))); + slowmem_fixup = B(CCFlags::CC_NEQ); + } + if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT) { if (flags & BackPatchInfo::FLAG_SIZE_F32) @@ -144,6 +156,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR handler.gprs = gprs_to_push; handler.fprs = fprs_to_push; handler.flags = flags; + handler.inst = inst; FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start]; auto handler_loc_iter = m_handler_to_loc.find(handler); @@ -162,10 +175,17 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR const u8* handler_loc = handler_loc_iter->second; fastmem_area->slowmem_code = handler_loc; fastmem_area->length = fastmem_end - fastmem_start; + + if (check_alignment) + SetJumpTarget(slowmem_fixup, handler_loc); + return; } } + if (check_alignment) + SetJumpTarget(slowmem_fixup); + ABI_PushRegisters(gprs_to_push); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); @@ -175,36 +195,36 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR { m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32); - BLR(ARM64Reg::X8); } else if (flags & BackPatchInfo::FLAG_SIZE_F32X2) { m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); ROR(ARM64Reg::X0, ARM64Reg::X0, 32); - BLR(ARM64Reg::X8); } else { m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); - BLR(ARM64Reg::X8); } + + MOVI2R(ARM64Reg::W2, inst.hex); + BLR(ARM64Reg::X8); } else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT) { if (flags & BackPatchInfo::FLAG_SIZE_F32) - { MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32); - BLR(ARM64Reg::X8); - m_float_emit.INS(32, RS, 0, ARM64Reg::X0); - } else - { MOVP2R(ARM64Reg::X8, &PowerPC::Read_F64); - BLR(ARM64Reg::X8); + + MOVI2R(ARM64Reg::W1, inst.hex); + BLR(ARM64Reg::X8); + + if (flags & BackPatchInfo::FLAG_SIZE_F32) + m_float_emit.INS(32, RS, 0, ARM64Reg::X0); + else m_float_emit.INS(64, RS, 0, ARM64Reg::X0); - } } else if (flags & BackPatchInfo::FLAG_STORE) { @@ -217,11 +237,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR else MOVP2R(ARM64Reg::X8, &PowerPC::Write_U8); + MOVI2R(ARM64Reg::W2, inst.hex); BLR(ARM64Reg::X8); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { MOVP2R(ARM64Reg::X8, &PowerPC::ClearCacheLine); + MOVI2R(ARM64Reg::W1, inst.hex); BLR(ARM64Reg::X8); } else @@ -233,6 +255,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR else if (flags & BackPatchInfo::FLAG_SIZE_8) MOVP2R(ARM64Reg::X8, &PowerPC::Read_U8); + MOVI2R(ARM64Reg::W1, inst.hex); BLR(ARM64Reg::X8); if (!(flags & BackPatchInfo::FLAG_REVERSE)) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index fbc106941f95..e8ec60f4f32c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -22,7 +22,8 @@ using namespace Arm64Gen; -void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) +void JitArm64::SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 offsetReg, u32 flags, + s32 offset, bool update) { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); @@ -121,9 +122,10 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate) mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { - EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(inst, flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address) { @@ -132,13 +134,15 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } else { - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, + fprs_in_use); } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); } -void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset) +void JitArm64::SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32 regOffset, + u32 flags, s32 offset) { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); @@ -256,10 +260,11 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; } - else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { MOVI2R(XA, imm_addr); - EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(inst, flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE)) { @@ -271,7 +276,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (is_immediate) MOVI2R(XA, imm_addr); - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use); } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); @@ -353,7 +358,7 @@ void JitArm64::lXX(UGeckoInstruction inst) break; } - SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); + SafeLoadToReg(inst, d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); } void JitArm64::stX(UGeckoInstruction inst) @@ -415,7 +420,7 @@ void JitArm64::stX(UGeckoInstruction inst) break; } - SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset); + SafeStoreFromReg(inst, update ? a : (a ? a : -1), s, regOffset, flags, offset); if (update) { @@ -647,7 +652,7 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; - EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, ARM64Reg::W0, + EmitBackpatchRoutine(inst, BackPatchInfo::FLAG_ZERO_256, true, true, ARM64Reg::W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); gpr.Unlock(ARM64Reg::W0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 068d61d0fb8b..4850d5769e78 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -168,13 +168,14 @@ void JitArm64::lfXX(UGeckoInstruction inst) fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(VD)] = 0; - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (jo.fastmem_arena && is_immediate && + PowerPC::IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst)) { - EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(inst, flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); } else { - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use); } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); @@ -357,16 +358,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; + u32 access_size = BackPatchInfo::GetFlagSize(flags); + if (is_immediate) { if (jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) { - int accessSize; - if (flags & BackPatchInfo::FLAG_SIZE_F64) - accessSize = 64; - else - accessSize = 32; - LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (flags & BackPatchInfo::FLAG_SIZE_F64) @@ -374,11 +371,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (flags & BackPatchInfo::FLAG_SIZE_F32) m_float_emit.REV32(8, ARM64Reg::D0, V0); - m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, - ARM64Reg::X0, accessSize >> 3); + m_float_emit.STR(access_size, IndexType::Post, + access_size == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, ARM64Reg::X0, + access_size >> 3); STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); - js.fifoBytesSinceCheck += accessSize >> 3; + js.fifoBytesSinceCheck += access_size >> 3; if (update) { @@ -387,18 +385,18 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOVI2R(gpr.R(a), imm_addr); } } - else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr, access_size, inst)) { - EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(inst, flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); } else { - EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, false, false, V0, XA, regs_in_use, fprs_in_use); } } else { - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(inst, flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use); } if (want_single && !have_single) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index c3778e330e82..a1ccd4e77821 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -190,8 +190,8 @@ void JitArm64::psq_st(UGeckoInstruction inst) flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2); - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(inst, flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), + gprs_in_use, fprs_in_use); } else { @@ -213,6 +213,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) ABI_PushRegisters(gprs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30); + MOVI2R(ARM64Reg::W3, inst.hex); BLR(EncodeRegTo64(type_reg)); m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30); ABI_PopRegisters(gprs_in_use); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 0f09176fdada..78fef88be9d2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -669,6 +669,7 @@ void JitArm64::GenerateQuantizedLoadStores() storePairedFloatSlow = GetCodePtr(); float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::Q0, 0); ROR(ARM64Reg::X0, ARM64Reg::X0, 32); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U64); BR(ARM64Reg::X2); } @@ -697,6 +698,7 @@ void JitArm64::GenerateQuantizedLoadStores() emit_quantize(); float_emit.UMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0); REV16(ARM64Reg::W0, ARM64Reg::W0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16); BR(ARM64Reg::X2); } @@ -724,6 +726,7 @@ void JitArm64::GenerateQuantizedLoadStores() emit_quantize(); float_emit.UMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0); REV16(ARM64Reg::W0, ARM64Reg::W0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16); BR(ARM64Reg::X2); } @@ -752,6 +755,7 @@ void JitArm64::GenerateQuantizedLoadStores() emit_quantize(); float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U32); BR(ARM64Reg::X2); } @@ -779,6 +783,7 @@ void JitArm64::GenerateQuantizedLoadStores() emit_quantize(); float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U32); BR(ARM64Reg::X2); } @@ -794,6 +799,7 @@ void JitArm64::GenerateQuantizedLoadStores() storeSingleFloatSlow = GetCodePtr(); float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U32); BR(ARM64Reg::X2); } @@ -820,6 +826,7 @@ void JitArm64::GenerateQuantizedLoadStores() storeSingleU8Slow = GetCodePtr(); emit_quantize(); float_emit.UMOV(8, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U8); BR(ARM64Reg::X2); } @@ -846,6 +853,7 @@ void JitArm64::GenerateQuantizedLoadStores() storeSingleS8Slow = GetCodePtr(); emit_quantize(); float_emit.SMOV(8, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U8); BR(ARM64Reg::X2); } @@ -872,6 +880,7 @@ void JitArm64::GenerateQuantizedLoadStores() storeSingleU16Slow = GetCodePtr(); emit_quantize(); float_emit.UMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16); BR(ARM64Reg::X2); } @@ -898,6 +907,7 @@ void JitArm64::GenerateQuantizedLoadStores() storeSingleS16Slow = GetCodePtr(); emit_quantize(); float_emit.SMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0); + MOV(ARM64Reg::W2, ARM64Reg::W3); MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16); BR(ARM64Reg::X2); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index eeabe7737dfa..d4e41daac338 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -46,5 +46,6 @@ void JitBase::UpdateMemoryOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints); - jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; + jo.alignment_exceptions = SConfig::GetInstance().bAlignmentExceptions; + jo.memcheck = jo.alignment_exceptions || SConfig::GetInstance().bMMU || any_watchpoints; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 2f1686b8324b..2ef309a6b514 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -63,6 +63,7 @@ class JitBase : public CPUCoreBase bool accurateSinglePrecision; bool fastmem; bool fastmem_arena; + bool alignment_exceptions; bool memcheck; bool profile_blocks; }; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 85647f2b04e3..91aeb686d189 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include @@ -16,6 +17,8 @@ #include "Core/HW/GPFifo.h" #include "Core/HW/MMIO.h" #include "Core/HW/Memmap.h" +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" @@ -106,6 +109,7 @@ struct TranslateAddressResult PAGE_FAULT } result; u32 address; + bool wi; // Set to true if the view of memory is either write-through or cache-inhibited bool Success() const { return result <= PAGE_TABLE_TRANSLATED; } }; template @@ -167,7 +171,7 @@ BatTable dbat_table; static void GenerateDSIException(u32 effective_address, bool write); template -static T ReadFromHardware(u32 em_address) +static T ReadFromHardware(u32 em_address, UGeckoInstruction inst) { if (!never_translate && MSR.DR) { @@ -178,12 +182,18 @@ static T ReadFromHardware(u32 em_address) GenerateDSIException(em_address, false); return 0; } + + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, translated_addr.wi)) + { + GenerateAlignmentException(em_address, inst); + return 0; + } + if ((em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) { // This could be unaligned down to the byte level... hopefully this is rare, so doing it this // way isn't too terrible. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1); auto addr_next_page = TranslateAddress(em_address_next_page); if (!addr_next_page.Success()) @@ -198,12 +208,21 @@ static T ReadFromHardware(u32 em_address) { if (addr == em_address_next_page) addr_translated = addr_next_page.address; - var = (var << 8) | ReadFromHardware(addr_translated); + var = (var << 8) | ReadFromHardware(addr_translated, inst); } return var; } em_address = translated_addr.address; } + else + { + if (flag == XCheckTLBFlag::Read && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, false)) + { + GenerateAlignmentException(em_address, inst); + return 0; + } + } // TODO: Make sure these are safe for unaligned addresses. @@ -256,7 +275,7 @@ static T ReadFromHardware(u32 em_address) } template -static void WriteToHardware(u32 em_address, const T data) +static void WriteToHardware(u32 em_address, const T data, UGeckoInstruction inst) { if (!never_translate && MSR.DR) { @@ -267,6 +286,14 @@ static void WriteToHardware(u32 em_address, const T data) GenerateDSIException(em_address, true); return; } + + if (flag == XCheckTLBFlag::Write && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, translated_addr.wi)) + { + GenerateAlignmentException(em_address, inst); + return; + } + if ((em_address & (sizeof(T) - 1)) && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) { @@ -288,12 +315,21 @@ static void WriteToHardware(u32 em_address, const T data) { if (em_address + i == em_address_next_page) addr_translated = addr_next_page.address; - WriteToHardware(addr_translated, static_cast(val >> (i * 8))); + WriteToHardware(addr_translated, static_cast(val >> (i * 8)), inst); } return; } em_address = translated_addr.address; } + else + { + if (flag == XCheckTLBFlag::Write && + AccessCausesAlignmentException(em_address, sizeof(T) << 3, inst, false)) + { + GenerateAlignmentException(em_address, inst); + return; + } + } // TODO: Make sure these are safe for unaligned addresses. @@ -423,9 +459,9 @@ TryReadInstResult TryReadInstruction(u32 address) return TryReadInstResult{true, from_bat, hex, address}; } -u32 HostRead_Instruction(const u32 address) +u32 HostRead_Instruction(u32 address) { - return ReadFromHardware(address); + return ReadFromHardware(address, 0); } static void Memcheck(u32 address, u32 var, bool write, size_t size) @@ -456,166 +492,166 @@ static void Memcheck(u32 address, u32 var, bool write, size_t size) } } -u8 Read_U8(const u32 address) +u8 Read_U8(u32 address, UGeckoInstruction inst) { - u8 var = ReadFromHardware(address); + u8 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 1); return var; } -u16 Read_U16(const u32 address) +u16 Read_U16(u32 address, UGeckoInstruction inst) { - u16 var = ReadFromHardware(address); + u16 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 2); return var; } -u32 Read_U32(const u32 address) +u32 Read_U32(u32 address, UGeckoInstruction inst) { - u32 var = ReadFromHardware(address); + u32 var = ReadFromHardware(address, inst); Memcheck(address, var, false, 4); return var; } -u64 Read_U64(const u32 address) +u64 Read_U64(u32 address, UGeckoInstruction inst) { - u64 var = ReadFromHardware(address); + u64 var = ReadFromHardware(address, inst); Memcheck(address, (u32)var, false, 8); return var; } -double Read_F64(const u32 address) +double Read_F64(u32 address, UGeckoInstruction inst) { - const u64 integral = Read_U64(address); + const u64 integral = Read_U64(address, inst); return Common::BitCast(integral); } -float Read_F32(const u32 address) +float Read_F32(u32 address, UGeckoInstruction inst) { - const u32 integral = Read_U32(address); + const u32 integral = Read_U32(address, inst); return Common::BitCast(integral); } -u32 Read_U8_ZX(const u32 address) +u32 Read_U8_ZX(u32 address, UGeckoInstruction inst) { - return Read_U8(address); + return Read_U8(address, inst); } -u32 Read_U16_ZX(const u32 address) +u32 Read_U16_ZX(u32 address, UGeckoInstruction inst) { - return Read_U16(address); + return Read_U16(address, inst); } -void Write_U8(const u8 var, const u32 address) +void Write_U8(u8 var, u32 address, UGeckoInstruction inst) { Memcheck(address, var, true, 1); - WriteToHardware(address, var); + WriteToHardware(address, var, inst); } -void Write_U16(const u16 var, const u32 address) +void Write_U16(u16 var, u32 address, UGeckoInstruction inst) { Memcheck(address, var, true, 2); - WriteToHardware(address, var); + WriteToHardware(address, var, inst); } -void Write_U16_Swap(const u16 var, const u32 address) +void Write_U16_Swap(u16 var, u32 address, UGeckoInstruction inst) { Memcheck(address, var, true, 2); - Write_U16(Common::swap16(var), address); + Write_U16(Common::swap16(var), address, inst); } -void Write_U32(const u32 var, const u32 address) +void Write_U32(u32 var, u32 address, UGeckoInstruction inst) { Memcheck(address, var, true, 4); - WriteToHardware(address, var); + WriteToHardware(address, var, inst); } -void Write_U32_Swap(const u32 var, const u32 address) +void Write_U32_Swap(u32 var, u32 address, UGeckoInstruction inst) { Memcheck(address, var, true, 4); - Write_U32(Common::swap32(var), address); + Write_U32(Common::swap32(var), address, inst); } -void Write_U64(const u64 var, const u32 address) +void Write_U64(u64 var, u32 address, UGeckoInstruction inst) { Memcheck(address, (u32)var, true, 8); - WriteToHardware(address, var); + WriteToHardware(address, var, inst); } -void Write_U64_Swap(const u64 var, const u32 address) +void Write_U64_Swap(u64 var, u32 address, UGeckoInstruction inst) { Memcheck(address, (u32)var, true, 8); - Write_U64(Common::swap64(var), address); + Write_U64(Common::swap64(var), address, inst); } -void Write_F64(const double var, const u32 address) +void Write_F64(double var, u32 address, UGeckoInstruction inst) { const u64 integral = Common::BitCast(var); - Write_U64(integral, address); + Write_U64(integral, address, inst); } -u8 HostRead_U8(const u32 address) +u8 HostRead_U8(u32 address) { - return ReadFromHardware(address); + return ReadFromHardware(address, 0); } -u16 HostRead_U16(const u32 address) +u16 HostRead_U16(u32 address) { - return ReadFromHardware(address); + return ReadFromHardware(address, 0); } -u32 HostRead_U32(const u32 address) +u32 HostRead_U32(u32 address) { - return ReadFromHardware(address); + return ReadFromHardware(address, 0); } -u64 HostRead_U64(const u32 address) +u64 HostRead_U64(u32 address) { - return ReadFromHardware(address); + return ReadFromHardware(address, 0); } -float HostRead_F32(const u32 address) +float HostRead_F32(u32 address) { const u32 integral = HostRead_U32(address); return Common::BitCast(integral); } -double HostRead_F64(const u32 address) +double HostRead_F64(u32 address) { const u64 integral = HostRead_U64(address); return Common::BitCast(integral); } -void HostWrite_U8(const u8 var, const u32 address) +void HostWrite_U8(u8 var, u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 0); } -void HostWrite_U16(const u16 var, const u32 address) +void HostWrite_U16(u16 var, u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 0); } -void HostWrite_U32(const u32 var, const u32 address) +void HostWrite_U32(u32 var, u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 0); } -void HostWrite_U64(const u64 var, const u32 address) +void HostWrite_U64(u64 var, u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 0); } -void HostWrite_F32(const float var, const u32 address) +void HostWrite_F32(float var, u32 address) { const u32 integral = Common::BitCast(var); HostWrite_U32(integral, address); } -void HostWrite_F64(const double var, const u32 address) +void HostWrite_F64(double var, u32 address) { const u64 integral = Common::BitCast(var); @@ -638,7 +674,7 @@ std::string HostGetString(u32 address, size_t size) return s; } -bool IsOptimizableRAMAddress(const u32 address) +bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst) { if (PowerPC::memchecks.HasAny()) return false; @@ -646,8 +682,12 @@ bool IsOptimizableRAMAddress(const u32 address) if (!MSR.DR) return false; - // TODO: This API needs to take an access size - // + if ((address & GetAlignmentMask(access_size)) != 0 && + AccessCausesAlignmentExceptionIfMisaligned(inst, false)) + { + return false; + } + // We store whether an access can be optimized to an unchecked access // in dbat_table. u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT]; @@ -770,7 +810,7 @@ void DMA_MemoryToLC(const u32 cache_address, const u32 mem_address, const u32 nu memcpy(dst, src, 32 * num_blocks); } -void ClearCacheLine(u32 address) +void ClearCacheLine(u32 address, UGeckoInstruction inst) { DEBUG_ASSERT((address & 0x1F) == 0); if (MSR.DR) @@ -795,7 +835,7 @@ void ClearCacheLine(u32 address) // TODO: This isn't precisely correct for non-RAM regions, but the difference // is unlikely to matter. for (u32 i = 0; i < 32; i += 8) - WriteToHardware(address + i, 0); + WriteToHardware(address + i, 0, inst); } u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) @@ -809,11 +849,12 @@ u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) // Translate address // If we also optimize for TLB mappings, we'd have to clear the // JitCache on each TLB invalidation. - if (!TranslateBatAddess(dbat_table, &address)) + bool wi; + if (!TranslateBatAddess(dbat_table, &address, &wi)) return 0; // Check whether the address is an aligned address of an MMIO register. - const bool aligned = (address & ((access_size >> 3) - 1)) == 0; + const bool aligned = (address & GetAlignmentMask(access_size)) == 0; if (!aligned || !MMIO::IsMMIOAddress(address)) return 0; @@ -831,7 +872,8 @@ bool IsOptimizableGatherPipeWrite(u32 address) // Translate address, only check BAT mapping. // If we also optimize for TLB mappings, we'd have to clear the // JitCache on each TLB invalidation. - if (!TranslateBatAddess(dbat_table, &address)) + bool wi; + if (!TranslateBatAddess(dbat_table, &address, &wi)) return false; // Check whether the translated address equals the address in WPAR. @@ -1000,18 +1042,20 @@ enum class TLBLookupResult UpdateC }; -static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 vpa, u32* paddr) +static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 vpa, u32* paddr, + bool* wi) { const u32 tag = vpa >> HW_PAGE_INDEX_SHIFT; TLBEntry& tlbe = ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK]; if (tlbe.tag[0] == tag) { + UPTE2 PTE2; + PTE2.Hex = tlbe.pte[0]; + // Check if C bit requires updating if (flag == XCheckTLBFlag::Write) { - UPTE2 PTE2; - PTE2.Hex = tlbe.pte[0]; if (PTE2.C == 0) { PTE2.C = 1; @@ -1024,16 +1068,18 @@ static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 tlbe.recent = 0; *paddr = tlbe.paddr[0] | (vpa & 0xfff); + *wi = PTE2.WIMG & 0b1100; return TLBLookupResult::Found; } if (tlbe.tag[1] == tag) { + UPTE2 PTE2; + PTE2.Hex = tlbe.pte[0]; + // Check if C bit requires updating if (flag == XCheckTLBFlag::Write) { - UPTE2 PTE2; - PTE2.Hex = tlbe.pte[1]; if (PTE2.C == 0) { PTE2.C = 1; @@ -1046,6 +1092,7 @@ static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 tlbe.recent = 1; *paddr = tlbe.paddr[1] | (vpa & 0xfff); + *wi = PTE2.WIMG & 0b1100; return TLBLookupResult::Found; } @@ -1080,14 +1127,14 @@ void InvalidateTLBEntry(u32 address) } // Page Address Translation -static TranslateAddressResult TranslatePageAddress(const u32 address, const XCheckTLBFlag flag) +static TranslateAddressResult TranslatePageAddress(const u32 address, const XCheckTLBFlag flag, + bool* wi) { // TLB cache // This catches 99%+ of lookups in practice, so the actual page table entry code below doesn't - // benefit - // much from optimization. + // benefit much from optimization. u32 translatedAddress = 0; - TLBLookupResult res = LookupTLBPageAddress(flag, address, &translatedAddress); + TLBLookupResult res = LookupTLBPageAddress(flag, address, &translatedAddress, wi); if (res == TLBLookupResult::Found) return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED, translatedAddress}; @@ -1162,6 +1209,8 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe if (res != TLBLookupResult::UpdateC) UpdateTLBEntry(flag, PTE2, address); + *wi = PTE2.WIMG & 0b1100; + return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED, (PTE2.RPN << 12) | offset}; } @@ -1173,7 +1222,7 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe static void UpdateBATs(BatTable& bat_table, u32 base_spr) { // TODO: Separate BATs for MSR.PR==0 and MSR.PR==1 - // TODO: Handle PP/WIMG settings. + // TODO: Handle PP settings. // TODO: Check how hardware reacts to overlapping BATs (including // BATs which should cause a DSI). // TODO: Check how hardware reacts to invalid BATs (bad mask etc). @@ -1218,19 +1267,38 @@ static void UpdateBATs(BatTable& bat_table, u32 base_spr) u32 physical_address = (batl.BRPN | j) << BAT_INDEX_SHIFT; u32 virtual_address = (batu.BEPI | j) << BAT_INDEX_SHIFT; - // The bottom bit is whether the translation is valid; the second - // bit from the bottom is whether we can use the fastmem arena. + // BAT_MAPPED_BIT is whether the translation is valid + // BAT_PHYSICAL_BIT is whether we can use the fastmem arena + // BAT_WI_BIT is whether W or I (of WIMG) are set - this affects alignment exceptions u32 valid_bit = BAT_MAPPED_BIT; - if (Memory::m_pFakeVMEM && (physical_address & 0xFE000000) == 0x7E000000) - valid_bit |= BAT_PHYSICAL_BIT; - else if (physical_address < Memory::GetRamSizeReal()) - valid_bit |= BAT_PHYSICAL_BIT; - else if (Memory::m_pEXRAM && physical_address >> 28 == 0x1 && - (physical_address & 0x0FFFFFFF) < Memory::GetExRamSizeReal()) - valid_bit |= BAT_PHYSICAL_BIT; - else if (physical_address >> 28 == 0xE && - physical_address < 0xE0000000 + Memory::GetL1CacheSize()) - valid_bit |= BAT_PHYSICAL_BIT; + + const bool wi = batl.WIMG & 0b1100; + if (wi) + valid_bit |= BAT_WI_BIT; + + // For correctness, we should disable fastmem when we have BAT_WI_BIT, + // but there is a setting which can be used to disable this behavior for performance + if (!wi || !SConfig::GetInstance().bAlignmentExceptions) + { + if (Memory::m_pFakeVMEM && (physical_address & 0xFE000000) == 0x7E000000) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + else if (physical_address < Memory::GetRamSizeReal()) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + else if (Memory::m_pEXRAM && physical_address >> 28 == 0x1 && + (physical_address & 0x0FFFFFFF) < Memory::GetExRamSizeReal()) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + else if (physical_address >> 28 == 0xE && + physical_address < 0xE0000000 + Memory::GetL1CacheSize()) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + } // Fastmem doesn't support memchecks, so disable it for all overlapping virtual pages. if (PowerPC::memchecks.OverlapsMemcheck(virtual_address, BAT_PAGE_SIZE)) @@ -1305,10 +1373,12 @@ void IBATUpdated() template static TranslateAddressResult TranslateAddress(u32 address) { - if (TranslateBatAddess(IsOpcodeFlag(flag) ? ibat_table : dbat_table, &address)) - return TranslateAddressResult{TranslateAddressResult::BAT_TRANSLATED, address}; + bool wi; - return TranslatePageAddress(address, flag); + if (TranslateBatAddess(IsOpcodeFlag(flag) ? ibat_table : dbat_table, &address, &wi)) + return TranslateAddressResult{TranslateAddressResult::BAT_TRANSLATED, address, wi}; + + return TranslatePageAddress(address, flag, &wi); } std::optional GetTranslatedAddress(u32 address) @@ -1321,4 +1391,74 @@ std::optional GetTranslatedAddress(u32 address) return std::optional(result.address); } +static bool IsFloat(UGeckoInstruction inst) +{ + // Floating loadstore, paired loadstore (exluding dcbz_l) + return (inst.OPCD >= 48 && inst.OPCD < 62) || (inst.OPCD == 4 && inst.SUBOP10 != 1014); + + // TODO: "In 750CL, the paired-single quantization load or store generates an alignment exception + // if the operand is not word-aligned when the corresponding GQRn[LD_TYPE] or GQRn[ST_TYPE] are + // type 0 and does not generate an alignment exception when the corresponding GQRn[LD_TYPE] or + // GQRn[ST_TYPE] are 4, 5, 6 or 7." Right now we treat it as always being type 0 +} + +static bool IsMultiword(UGeckoInstruction inst) +{ + // lmw, stmw + if (inst.OPCD == 46 || inst.OPCD == 47) + return true; + + if (inst.OPCD != 31) + return false; + + // lswx, lswi, stswx, stswi + return inst.SUBOP10 == 533 || inst.SUBOP10 == 597 || inst.SUBOP10 == 661 || inst.SUBOP10 == 725; +} + +static bool IsDcbz(UGeckoInstruction inst) +{ + // dcbz, dcbz_l + return inst.SUBOP10 == 1014 && (inst.OPCD == 31 || inst.OPCD == 13); +} + +static bool IsEciwxOrEcowx(UGeckoInstruction inst) +{ + // eciwx, ecowx + return inst.OPCD == 31 && (inst.SUBOP10 == 310 || inst.SUBOP10 == 438); +} + +static bool IsLwarxOrStwcx(UGeckoInstruction inst) +{ + // lwarx, stwcx + return inst.OPCD == 31 && (inst.SUBOP10 == 20 || inst.SUBOP10 == 150); +} + +bool AccessAlwaysCausesAlignmentException(UGeckoInstruction inst, bool wi) +{ + if (!wi) + return false; + + return IsMultiword(inst) || IsDcbz(inst) || IsLwarxOrStwcx(inst); +} + +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, bool wi) +{ + if (wi) + return true; + + return IsFloat(inst) || IsMultiword(inst) || IsEciwxOrEcowx(inst) || IsLwarxOrStwcx(inst); +} + +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi) +{ + if (AccessAlwaysCausesAlignmentException(inst, wi)) + return true; + + if ((effective_address & GetAlignmentMask(access_size)) == 0) + return false; + + return AccessCausesAlignmentExceptionIfMisaligned(inst, wi); +} + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 48d97ca606f8..14efbfcb88b8 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -10,6 +10,7 @@ #include #include "Common/CommonTypes.h" +#include "Core/PowerPC/Gekko.h" namespace PowerPC { @@ -53,34 +54,35 @@ struct TryReadInstResult }; TryReadInstResult TryReadInstruction(u32 address); -u8 Read_U8(u32 address); -u16 Read_U16(u32 address); -u32 Read_U32(u32 address); -u64 Read_U64(u32 address); +u8 Read_U8(u32 address, UGeckoInstruction inst); +u16 Read_U16(u32 address, UGeckoInstruction inst); +u32 Read_U32(u32 address, UGeckoInstruction inst); +u64 Read_U64(u32 address, UGeckoInstruction inst); // Useful helper functions, used by ARM JIT -float Read_F32(u32 address); -double Read_F64(u32 address); +float Read_F32(u32 address, UGeckoInstruction inst); +double Read_F64(u32 address, UGeckoInstruction inst); // used by JIT. Return zero-extended 32bit values -u32 Read_U8_ZX(u32 address); -u32 Read_U16_ZX(u32 address); +u32 Read_U8_ZX(u32 address, UGeckoInstruction inst); +u32 Read_U16_ZX(u32 address, UGeckoInstruction inst); -void Write_U8(u8 var, u32 address); -void Write_U16(u16 var, u32 address); -void Write_U32(u32 var, u32 address); -void Write_U64(u64 var, u32 address); +void Write_U8(u8 var, u32 address, UGeckoInstruction inst); +void Write_U16(u16 var, u32 address, UGeckoInstruction inst); +void Write_U32(u32 var, u32 address, UGeckoInstruction inst); +void Write_U64(u64 var, u32 address, UGeckoInstruction inst); -void Write_U16_Swap(u16 var, u32 address); -void Write_U32_Swap(u32 var, u32 address); -void Write_U64_Swap(u64 var, u32 address); +void Write_U16_Swap(u16 var, u32 address, UGeckoInstruction inst); +void Write_U32_Swap(u32 var, u32 address, UGeckoInstruction inst); +void Write_U64_Swap(u64 var, u32 address, UGeckoInstruction inst); // Useful helper functions, used by ARM JIT void Write_F64(double var, u32 address); void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks); void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks); -void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned +void ClearCacheLine(u32 address, + UGeckoInstruction inst); // Zeroes 32 bytes; address should be 32-byte-aligned // TLB functions void SDRUpdated(); @@ -91,7 +93,7 @@ void IBATUpdated(); // Result changes based on the BAT registers and MSR.DR. Returns whether // it's safe to optimize a read or write to this address to an unguarded // memory access. Does not consider page tables. -bool IsOptimizableRAMAddress(u32 address); +bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst); u32 IsOptimizableMMIOAccess(u32 address, u32 access_size); bool IsOptimizableGatherPipeWrite(u32 address); @@ -107,18 +109,31 @@ constexpr int BAT_INDEX_SHIFT = 17; constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT; constexpr u32 BAT_MAPPED_BIT = 0x1; constexpr u32 BAT_PHYSICAL_BIT = 0x2; -constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x3); +constexpr u32 BAT_WI_BIT = 0x4; +constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7); using BatTable = std::array; // 128 KB extern BatTable ibat_table; extern BatTable dbat_table; -inline bool TranslateBatAddess(const BatTable& bat_table, u32* address) +inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi) { u32 bat_result = bat_table[*address >> BAT_INDEX_SHIFT]; if ((bat_result & BAT_MAPPED_BIT) == 0) return false; *address = (bat_result & BAT_RESULT_MASK) | (*address & (BAT_PAGE_SIZE - 1)); + *wi = bat_result & BAT_WI_BIT; return true; } std::optional GetTranslatedAddress(u32 address); + +constexpr u32 GetAlignmentMask(size_t size) +{ + return static_cast(std::min(4, size >> 3) - 1); +} + +bool AccessAlwaysCausesAlignmentException(UGeckoInstruction inst, bool wi); +bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, bool wi); +bool AccessCausesAlignmentException(u32 effective_address, size_t access_size, + UGeckoInstruction inst, bool wi); + } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 4da70e44cc9a..8b6c4022bfeb 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -532,8 +532,7 @@ void CheckExceptions() MSR.LE = MSR.ILE; MSR.Hex &= ~0x04EF36; PC = NPC = 0x00000600; - - // TODO crazy amount of DSISR options to check out + // DSISR and DAR regs are changed in GenerateAlignmentException() DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT"); ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT;