From 2a5f4a23902f8d4d82eda52191fb1792eaf2f2b9 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Sat, 8 Oct 2022 18:19:37 +0200
Subject: [PATCH] Jit64: Don't use fregsIn in HandleNaNs

fregsIn will include FD for double-precision instructions, since for
dependency tracking purposes the instruction does read the upper
half of FD. This is not what we want in HandleNaNs.

The consequence of this bug is that if an instruction was supposed to
output a NaN and FD happens to contain a NaN and FD happens to be the
same register as an unused register in the instruction encoding, the
NaN in FD could get used as the output instead of the correct NaN.
This isn't known to affect any games, which isn't especially surprising
considering that there's only one game that needs AccurateNaNs anyway.
---
 Source/Core/Core/PowerPC/Jit64/Jit.h          |  3 ++-
 .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp  | 25 +++++++++----------
 Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp |  4 +--
 3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 2b4a1592ffe0..04d30e46df25 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -126,7 +126,8 @@ class Jit64 : public JitBase, public QuantizedMemoryRoutines
   void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
                             bool duplicate = false);
   void FinalizeDoubleResult(Gen::X64Reg output, const Gen::OpArg& input);
-  void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm, Gen::X64Reg clobber);
+  void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm, Gen::X64Reg clobber,
+                  std::vector<int> inputs);
 
   void MultiplyImmediate(u32 imm, int a, int d, bool overflow);
 
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index c14130e2f256..918d6260d2f1 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -92,7 +92,7 @@ void Jit64::FinalizeDoubleResult(X64Reg output, const OpArg& input)
   SetFPRFIfNeeded(input, false);
 }
 
-void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
+void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber, std::vector<int> inputs)
 {
   //                      | PowerPC  | x86
   // ---------------------+----------+---------
@@ -107,14 +107,13 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
 
   ASSERT(xmm != clobber);
 
-  std::vector<u32> inputs;
-  u32 a = inst.FA, b = inst.FB, c = inst.FC;
-  for (u32 i : {a, b, c})
+  // Remove duplicates from inputs
+  for (auto it = inputs.begin(); it != inputs.end();)
   {
-    if (!js.op->fregsIn[i])
-      continue;
-    if (std::find(inputs.begin(), inputs.end(), i) == inputs.end())
-      inputs.push_back(i);
+    if (std::find(inputs.begin(), it, *it) != it)
+      it = inputs.erase(it);
+    else
+      ++it;
   }
 
   if (inst.OPCD != 4)
@@ -128,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
 
     // If any inputs are NaNs, pick the first NaN of them
     std::vector<FixupBranch> fixups;
-    for (u32 x : inputs)
+    for (int x : inputs)
     {
       RCOpArg Rx = fpr.Use(x, RCMode::Read);
       RegCache::Realize(Rx);
@@ -168,7 +167,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
       BLENDVPD(xmm, MConst(psGeneratedQNaN));
 
       // If any inputs are NaNs, use those instead
-      for (u32 x : inputs)
+      for (int x : inputs)
       {
         RCOpArg Rx = fpr.Use(x, RCMode::Read);
         RegCache::Realize(Rx);
@@ -198,7 +197,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
       MOVAPD(xmm, tmp);
 
       // If any inputs are NaNs, use those instead
-      for (u32 x : inputs)
+      for (int x : inputs)
       {
         RCOpArg Rx = fpr.Use(x, RCMode::Read);
         RegCache::Realize(Rx);
@@ -277,7 +276,7 @@ void Jit64::fp_arith(UGeckoInstruction inst)
       avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible);
     }
 
-    HandleNaNs(inst, dest, XMM0);
+    HandleNaNs(inst, dest, XMM0, {op1, op2});
     if (single)
       FinalizeSingleResult(Rd, R(dest), packed, true);
     else
@@ -500,7 +499,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
     result_xmm = Rd;
   }
 
-  HandleNaNs(inst, result_xmm, XMM0);
+  HandleNaNs(inst, result_xmm, XMM0, {a, b, c});
 
   if (single)
     FinalizeSingleResult(Rd, R(result_xmm), packed, true);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
index 74ae5ba584b1..bea3829e0a60 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@@ -77,7 +77,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
   default:
     PanicAlertFmt("ps_sum WTF!!!");
   }
-  HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1);
+  HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1, {a, b, c});
   FinalizeSingleResult(Rd, R(tmp));
 }
 
@@ -112,7 +112,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
   if (round_input)
     Force25BitPrecision(XMM1, R(XMM1), XMM0);
   MULPD(XMM1, Ra);
-  HandleNaNs(inst, XMM1, XMM0);
+  HandleNaNs(inst, XMM1, XMM0, {a, c});
   FinalizeSingleResult(Rd, R(XMM1));
 }