Fix FP32 normalize issues

Previously, we used legacy.mul intrinsic to do multiplication of mul(x, rsq(dot(x))) to compute normalize(x) for FP32. This was to avoid zero vector of x. But such v_mul_legacy_f32 fails to do such multiplication mul(-0.0, +value). The result is expected to be -0.0. The sign of zero is dropped. To fix such issues, we manually check x=0 by avoiding the use of v_mul_legacy_f32. GLSL spec doesn't say anything about x=0. Not sure if such special check of FP32 is caused by invalid shader input of some games. Just still keep the workaround.
linqun · Oct 25, 2023 · 4953e9d · 4953e9d
1 parent 55c9f2a
commit 4953e9d
Showing 1 changed file with 7 additions and 7 deletions.
diff --git a/lgc/builder/ArithBuilder.cpp b/lgc/builder/ArithBuilder.cpp
@@ -817,13 +817,13 @@ Value *BuilderImpl::CreateNormalizeVector(Value *x, const Twine &instName) {
   Value *dot = CreateDotProduct(x, x);
   Value *sqrt = CreateSqrt(dot);
   Value *rsq = CreateFDiv(ConstantFP::get(sqrt->getType(), 1.0), sqrt);
-  // We use fmul.legacy for float so that a zero vector is normalized to a zero vector,
-  // rather than NaNs. We must scalarize it ourselves.
-  Value *result = scalarize(x, [this, rsq](Value *x) -> Value * {
-    if (rsq->getType()->isFloatTy())
-      return CreateIntrinsic(Intrinsic::amdgcn_fmul_legacy, {}, {x, rsq});
-    return CreateFMul(x, rsq);
-  });
+  if (x->getType()->getScalarType()->isFloatTy()) {
+    // Make sure a FP32 zero vector is normalized to a FP32 zero vector, rather than NaNs.
+    auto zero = ConstantFP::get(getFloatTy(), 0.0);
+    auto isZeroDot = CreateFCmpOEQ(dot, zero);
+    rsq = CreateSelect(isZeroDot, zero, rsq);
+  }
+  Value *result = scalarize(x, [this, rsq](Value *x) -> Value * { return CreateFMul(x, rsq); });
   result->setName(instName);
   return result;
 }