From 2a5f4a23902f8d4d82eda52191fb1792eaf2f2b9 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 8 Oct 2022 18:19:37 +0200 Subject: [PATCH] Jit64: Don't use fregsIn in HandleNaNs fregsIn will include FD for double-precision instructions, since for dependency tracking purposes the instruction does read the upper half of FD. This is not what we want in HandleNaNs. The consequence of this bug is that if an instruction was supposed to output a NaN and FD happens to contain a NaN and FD happens to be the same register as an unused register in the instruction encoding, the NaN in FD could get used as the output instead of the correct NaN. This isn't known to affect any games, which isn't especially surprising considering that there's only one game that needs AccurateNaNs anyway. --- Source/Core/Core/PowerPC/Jit64/Jit.h | 3 ++- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 25 +++++++++---------- Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 4 +-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 2b4a1592ffe0..04d30e46df25 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -126,7 +126,8 @@ class Jit64 : public JitBase, public QuantizedMemoryRoutines void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true, bool duplicate = false); void FinalizeDoubleResult(Gen::X64Reg output, const Gen::OpArg& input); - void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm, Gen::X64Reg clobber); + void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm, Gen::X64Reg clobber, + std::vector inputs); void MultiplyImmediate(u32 imm, int a, int d, bool overflow); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index c14130e2f256..918d6260d2f1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -92,7 +92,7 @@ void Jit64::FinalizeDoubleResult(X64Reg output, const OpArg& input) SetFPRFIfNeeded(input, false); } -void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) +void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber, std::vector inputs) { // | PowerPC | x86 // ---------------------+----------+--------- @@ -107,14 +107,13 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) ASSERT(xmm != clobber); - std::vector inputs; - u32 a = inst.FA, b = inst.FB, c = inst.FC; - for (u32 i : {a, b, c}) + // Remove duplicates from inputs + for (auto it = inputs.begin(); it != inputs.end();) { - if (!js.op->fregsIn[i]) - continue; - if (std::find(inputs.begin(), inputs.end(), i) == inputs.end()) - inputs.push_back(i); + if (std::find(inputs.begin(), it, *it) != it) + it = inputs.erase(it); + else + ++it; } if (inst.OPCD != 4) @@ -128,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) // If any inputs are NaNs, pick the first NaN of them std::vector fixups; - for (u32 x : inputs) + for (int x : inputs) { RCOpArg Rx = fpr.Use(x, RCMode::Read); RegCache::Realize(Rx); @@ -168,7 +167,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) BLENDVPD(xmm, MConst(psGeneratedQNaN)); // If any inputs are NaNs, use those instead - for (u32 x : inputs) + for (int x : inputs) { RCOpArg Rx = fpr.Use(x, RCMode::Read); RegCache::Realize(Rx); @@ -198,7 +197,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) MOVAPD(xmm, tmp); // If any inputs are NaNs, use those instead - for (u32 x : inputs) + for (int x : inputs) { RCOpArg Rx = fpr.Use(x, RCMode::Read); RegCache::Realize(Rx); @@ -277,7 +276,7 @@ void Jit64::fp_arith(UGeckoInstruction inst) avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible); } - HandleNaNs(inst, dest, XMM0); + HandleNaNs(inst, dest, XMM0, {op1, op2}); if (single) FinalizeSingleResult(Rd, R(dest), packed, true); else @@ -500,7 +499,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst) result_xmm = Rd; } - HandleNaNs(inst, result_xmm, XMM0); + HandleNaNs(inst, result_xmm, XMM0, {a, b, c}); if (single) FinalizeSingleResult(Rd, R(result_xmm), packed, true); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 74ae5ba584b1..bea3829e0a60 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -77,7 +77,7 @@ void Jit64::ps_sum(UGeckoInstruction inst) default: PanicAlertFmt("ps_sum WTF!!!"); } - HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1); + HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1, {a, b, c}); FinalizeSingleResult(Rd, R(tmp)); } @@ -112,7 +112,7 @@ void Jit64::ps_muls(UGeckoInstruction inst) if (round_input) Force25BitPrecision(XMM1, R(XMM1), XMM0); MULPD(XMM1, Ra); - HandleNaNs(inst, XMM1, XMM0); + HandleNaNs(inst, XMM1, XMM0, {a, c}); FinalizeSingleResult(Rd, R(XMM1)); }