diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8554db0a1220c..02551c59406a5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16914,8 +16914,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) --> // fma A, B, (fma C, D, fma (E, F, G)). // This requires reassociation because it changes the order of operations. - bool CanReassociate = - Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + bool CanReassociate = N->getFlags().hasAllowReassociation(); if (CanReassociate) { SDValue FMA, E; if (isFusedOp(N0) && N0.hasOneUse()) { @@ -17581,7 +17580,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + if ((Options.NoSignedZerosFPMath || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -17668,7 +17667,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } // enable-unsafe-fp-math && AllowNewConst - if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + if ((Options.NoSignedZerosFPMath || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) { // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y)) if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL, @@ -17771,7 +17770,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } - if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + if ((Options.NoSignedZerosFPMath || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y @@ -17911,7 +17910,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -17935,7 +17933,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { + if (Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) && N0.getOpcode() == ISD::FMUL) { @@ -18088,8 +18086,7 @@ template SDValue DAGCombiner::visitFMA(SDNode *N) { return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); } - // FIXME: use fast math flags instead of Options.UnsafeFPMath - // TODO: Finally migrate away from global TargetOptions. + // FIXME: Finally migrate away from global TargetOptions. if (Options.AllowFPOpFusion == FPOpFusion::Fast || (Options.NoNaNsFPMath && Options.NoInfsFPMath) || (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) { @@ -18113,8 +18110,7 @@ template SDValue DAGCombiner::visitFMA(SDNode *N) { !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2); - bool CanReassociate = - Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + bool CanReassociate = N->getFlags().hasAllowReassociation(); if (CanReassociate) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) && @@ -18209,9 +18205,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // TODO: Limit this transform based on optsize/minsize - it always creates at // least 1 extra instruction. But the perf win may be substantial enough // that only minsize should restrict this. - bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); - if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal())) + if (LegalDAG || !Flags.hasAllowReciprocal()) return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. @@ -18248,7 +18243,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. - if (UnsafeMath || U->getFlags().hasAllowReciprocal()) + if (U->getFlags().hasAllowReciprocal()) Users.insert(U); } } @@ -18861,7 +18856,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) + if (N->getFlags().hasAllowContract() || N0IsTrunc) return DAG.getNode( ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 04d6fd5f48cc3..b32720ba2a853 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3908,11 +3908,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, DAG.getTargetConstant( - 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); + 0, dl, TLI.getPointerTy(DAG.getDataLayout())), + Flags)); } void SelectionDAGBuilder::visitFPExt(const User &I) { diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll index d8f7f0a306684..91bb8ac714908 100644 --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -11,7 +11,7 @@ ; a / D; b / D; c / D; ; => ; recip = 1.0 / D; a * recip; b * recip; c * recip; -define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 { +define void @three_fdiv_float(float %D, float %a, float %b, float %c) { ; CHECK-SD-LABEL: three_fdiv_float: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s4, #1.00000000 @@ -28,14 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 { ; CHECK-GI-NEXT: fdiv s2, s3, s0 ; CHECK-GI-NEXT: fmov s0, s4 ; CHECK-GI-NEXT: b foo_3f - %div = fdiv float %a, %D - %div1 = fdiv float %b, %D - %div2 = fdiv float %c, %D + %div = fdiv arcp float %a, %D + %div1 = fdiv arcp float %b, %D + %div2 = fdiv arcp float %c, %D tail call void @foo_3f(float %div, float %div1, float %div2) ret void } -define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { +define void @three_fdiv_double(double %D, double %a, double %b, double %c) { ; CHECK-SD-LABEL: three_fdiv_double: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov d4, #1.00000000 @@ -52,14 +52,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { ; CHECK-GI-NEXT: fdiv d2, d3, d0 ; CHECK-GI-NEXT: fmov d0, d4 ; CHECK-GI-NEXT: b foo_3d - %div = fdiv double %a, %D - %div1 = fdiv double %b, %D - %div2 = fdiv double %c, %D + %div = fdiv arcp double %a, %D + %div1 = fdiv arcp double %b, %D + %div2 = fdiv arcp double %c, %D tail call void @foo_3d(double %div, double %div1, double %div2) ret void } -define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-SD-LABEL: three_fdiv_4xfloat: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 @@ -76,14 +76,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, ; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s ; CHECK-GI-NEXT: mov v0.16b, v4.16b ; CHECK-GI-NEXT: b foo_3_4xf - %div = fdiv <4 x float> %a, %D - %div1 = fdiv <4 x float> %b, %D - %div2 = fdiv <4 x float> %c, %D + %div = fdiv arcp <4 x float> %a, %D + %div1 = fdiv arcp <4 x float> %b, %D + %div2 = fdiv arcp <4 x float> %c, %D tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) ret void } -define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { +define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-SD-LABEL: three_fdiv_2xdouble: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov v4.2d, #1.00000000 @@ -100,42 +100,42 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> ; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d ; CHECK-GI-NEXT: mov v0.16b, v4.16b ; CHECK-GI-NEXT: b foo_3_2xd - %div = fdiv <2 x double> %a, %D - %div1 = fdiv <2 x double> %b, %D - %div2 = fdiv <2 x double> %c, %D + %div = fdiv arcp <2 x double> %a, %D + %div1 = fdiv arcp <2 x double> %b, %D + %div2 = fdiv arcp <2 x double> %c, %D tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2) ret void } ; Following test cases check we never combine two FDIVs if neither of them ; calculates a reciprocal. -define void @two_fdiv_float(float %D, float %a, float %b) #0 { +define void @two_fdiv_float(float %D, float %a, float %b) { ; CHECK-LABEL: two_fdiv_float: ; CHECK: // %bb.0: ; CHECK-NEXT: fdiv s3, s1, s0 ; CHECK-NEXT: fdiv s1, s2, s0 ; CHECK-NEXT: fmov s0, s3 ; CHECK-NEXT: b foo_2f - %div = fdiv float %a, %D - %div1 = fdiv float %b, %D + %div = fdiv arcp float %a, %D + %div1 = fdiv arcp float %b, %D tail call void @foo_2f(float %div, float %div1) ret void } -define void @two_fdiv_double(double %D, double %a, double %b) #0 { +define void @two_fdiv_double(double %D, double %a, double %b) { ; CHECK-LABEL: two_fdiv_double: ; CHECK: // %bb.0: ; CHECK-NEXT: fdiv d3, d1, d0 ; CHECK-NEXT: fdiv d1, d2, d0 ; CHECK-NEXT: fmov d0, d3 ; CHECK-NEXT: b foo_2d - %div = fdiv double %a, %D - %div1 = fdiv double %b, %D + %div = fdiv arcp double %a, %D + %div1 = fdiv arcp double %b, %D tail call void @foo_2d(double %div, double %div1) ret void } -define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-SD-LABEL: splat_three_fdiv_4xfloat: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 @@ -157,14 +157,14 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, ; CHECK-GI-NEXT: b foo_3_4xf %D.ins = insertelement <4 x float> poison, float %D, i64 0 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer - %div = fdiv <4 x float> %a, %splat - %div1 = fdiv <4 x float> %b, %splat - %div2 = fdiv <4 x float> %c, %splat + %div = fdiv arcp <4 x float> %a, %splat + %div1 = fdiv arcp <4 x float> %b, %splat + %div2 = fdiv arcp <4 x float> %c, %splat tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) ret void } -define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { +define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #0 { ; CHECK-SD-LABEL: splat_fdiv_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 @@ -183,11 +183,11 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { entry: %D.ins = insertelement <4 x float> poison, float %D, i64 0 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer - %div = fdiv <4 x float> %a, %splat + %div = fdiv arcp <4 x float> %a, %splat ret <4 x float> %div } -define @splat_fdiv_nxv4f32(float %D, %a) #1 { +define @splat_fdiv_nxv4f32(float %D, %a) #0 { ; CHECK-LABEL: splat_fdiv_nxv4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov s2, #1.00000000 @@ -198,11 +198,11 @@ define @splat_fdiv_nxv4f32(float %D, % entry: %D.ins = insertelement poison, float %D, i64 0 %splat = shufflevector %D.ins, poison, zeroinitializer - %div = fdiv %a, %splat + %div = fdiv arcp %a, %splat ret %div } -define void @splat_three_fdiv_nxv4f32(float %D, %a, %b, %c) #1 { +define void @splat_three_fdiv_nxv4f32(float %D, %a, %b, %c) #0 { ; CHECK-LABEL: splat_three_fdiv_nxv4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov s4, #1.00000000 @@ -215,14 +215,14 @@ define void @splat_three_fdiv_nxv4f32(float %D, %a, poison, float %D, i64 0 %splat = shufflevector %D.ins, poison, zeroinitializer - %div = fdiv %a, %splat - %div1 = fdiv %b, %splat - %div2 = fdiv %c, %splat + %div = fdiv arcp %a, %splat + %div1 = fdiv arcp %b, %splat + %div2 = fdiv arcp %c, %splat tail call void @foo_3_nxv4f32( %div, %div1, %div2) ret void } -define @splat_fdiv_nxv2f64(double %D, %a) #1 { +define @splat_fdiv_nxv2f64(double %D, %a) #0 { ; CHECK-LABEL: splat_fdiv_nxv2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -237,7 +237,7 @@ entry: ret %div } -define void @splat_two_fdiv_nxv2f64(double %D, %a, %b) #1 { +define void @splat_two_fdiv_nxv2f64(double %D, %a, %b) #0 { ; CHECK-LABEL: splat_two_fdiv_nxv2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d3, #1.00000000 @@ -249,8 +249,8 @@ define void @splat_two_fdiv_nxv2f64(double %D, %a, poison, double %D, i64 0 %splat = shufflevector %D.ins, poison, zeroinitializer - %div = fdiv %a, %splat - %div1 = fdiv %b, %splat + %div = fdiv arcp %a, %splat + %div1 = fdiv arcp %b, %splat tail call void @foo_2_nxv2f64( %div, %div1) ret void } @@ -264,5 +264,4 @@ declare void @foo_2d(double, double) declare void @foo_3_nxv4f32(, , ) declare void @foo_2_nxv2f64(, ) -attributes #0 = { "unsafe-fp-math"="true" } -attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" } +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll index 576ed270183f6..58ebf2aafa5a8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll @@ -16,7 +16,7 @@ ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) { %sin = call float @llvm.sin.f32(float %x) store float %sin, ptr addrspace(1) %out ret void @@ -29,7 +29,7 @@ define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 { ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) { %y = fmul float 3.0, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out @@ -44,9 +44,9 @@ define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 { ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 { - %y = fmul float 3.0, %x - %sin = call float @llvm.sin.f32(float %y) +define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) { + %y = fmul reassoc float 3.0, %x + %sin = call reassoc float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out ret void } @@ -59,7 +59,7 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) { %y = fmul reassoc float 3.0, %x %sin = call reassoc float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out @@ -73,7 +73,7 @@ define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 { ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) { %y = fmul float 2.0, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out @@ -88,9 +88,9 @@ define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 { ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 { - %y = fmul float 2.0, %x - %sin = call float @llvm.sin.f32(float %y) +define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) { + %y = fmul reassoc float 2.0, %x + %sin = call reassoc float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out ret void } @@ -103,7 +103,7 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) { %y = fmul reassoc float 2.0, %x %sin = call reassoc float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out @@ -117,7 +117,7 @@ define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 { ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) { %y = fmul float 0x401921FB60000000, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out @@ -131,9 +131,9 @@ define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 { - %y = fmul float 0x401921FB60000000, %x - %sin = call float @llvm.sin.f32(float %y) +define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) { + %y = fmul reassoc float 0x401921FB60000000, %x + %sin = call reassoc float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out ret void } @@ -145,7 +145,7 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 { +define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) { %y = fmul reassoc float 0x401921FB60000000, %x %sin = call reassoc float @llvm.sin.f32(float %y) store float %sin, ptr addrspace(1) %out @@ -164,7 +164,7 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) # ; GCN: v_sin_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 { +define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) { %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx) store <4 x float> %sin, ptr addrspace(1) %out ret void @@ -174,5 +174,3 @@ declare float @llvm.sin.f32(float) #0 declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0 attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "unsafe-fp-math"="false" } -attributes #2 = { nounwind "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/ARM/fp-fast.ll b/llvm/test/CodeGen/ARM/fp-fast.ll index 7d95a5efe9052..6e1c783bfbe3e 100644 --- a/llvm/test/CodeGen/ARM/fp-fast.ll +++ b/llvm/test/CodeGen/ARM/fp-fast.ll @@ -1,5 +1,4 @@ -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math %s -o - \ -; RUN: | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 %s -o - | FileCheck %s ; CHECK: test1 define float @test1(float %x) { @@ -7,7 +6,7 @@ define float @test1(float %x) { ; CHECK: vmul.f32 ; CHECK-NOT: vfma %t1 = fmul float %x, 3.0 - %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1) + %t2 = call reassoc float @llvm.fma.f32(float %x, float 2.0, float %t1) ret float %t2 } @@ -17,7 +16,7 @@ define float @test2(float %x, float %y) { ; CHECK: vfma.f32 ; CHECK-NOT: vmul %t1 = fmul float %x, 3.0 - %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y) + %t2 = call reassoc float @llvm.fma.f32(float %t1, float 2.0, float %y) ret float %t2 } @@ -44,7 +43,7 @@ define float @test5(float %x) { ; CHECK-NOT: vfma ; CHECK: vmul.f32 ; CHECK-NOT: vfma - %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x) + %t2 = call reassoc float @llvm.fma.f32(float %x, float 2.0, float %x) ret float %t2 } @@ -54,7 +53,7 @@ define float @test6(float %x) { ; CHECK: vmul.f32 ; CHECK-NOT: vfma %t1 = fsub float -0.0, %x - %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1) + %t2 = call reassoc float @llvm.fma.f32(float %x, float 5.0, float %t1) ret float %t2 } diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll index bc48d242f88fd..a3bcd708d48ff 100644 --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -395,7 +395,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f ret float %w } -define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 { +define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) { ; CHECK-LABEL: repeated_div_fast( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -416,14 +416,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 ; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor - %z = fmul float %x, %y + %x = fdiv arcp float %a, %divisor + %y = fdiv contract arcp afn float %b, %divisor + %z = fmul contract float %x, %y %w = select i1 %pred, float %z, float %y ret float %w } -define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 { +define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) { ; CHECK-LABEL: repeated_div_fast_sel( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -441,13 +441,13 @@ define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor ; CHECK-NEXT: div.approx.f32 %r5, %r3, %r4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor + %x = fdiv afn float %a, %divisor + %y = fdiv afn float %b, %divisor %w = select i1 %pred, float %x, float %y ret float %w } -define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 { +define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #1 { ; CHECK-LABEL: repeated_div_fast_ftz( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -468,14 +468,14 @@ define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor ; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor - %z = fmul float %x, %y + %x = fdiv arcp float %a, %divisor + %y = fdiv contract arcp afn float %b, %divisor + %z = fmul contract float %x, %y %w = select i1 %pred, float %z, float %y ret float %w } -define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 { +define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 { ; CHECK-LABEL: repeated_div_fast_ftz_sel( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -493,8 +493,8 @@ define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %div ; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor + %x = fdiv afn float %a, %divisor + %y = fdiv afn float %b, %divisor %w = select i1 %pred, float %x, float %y ret float %w } diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll index 1034c3eed3dc0..6693c9044ca2c 100644 --- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll +++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll @@ -20,10 +20,10 @@ define ptx_device float @t1_f32(float %x, float %y, float %z, ; CHECK-UNSAFE-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-UNSAFE-NEXT: ret; float %u, float %v) { - %a = fmul float %x, %y - %b = fmul float %u, %v - %c = fadd float %a, %b - %d = fadd float %c, %z + %a = fmul reassoc float %x, %y + %b = fmul reassoc float %u, %v + %c = fadd reassoc float %a, %b + %d = fadd reassoc float %c, %z ret float %d } @@ -43,10 +43,10 @@ define ptx_device double @t1_f64(double %x, double %y, double %z, ; CHECK-UNSAFE-NEXT: st.param.b64 [func_retval0], %rd7; ; CHECK-UNSAFE-NEXT: ret; double %u, double %v) { - %a = fmul double %x, %y - %b = fmul double %u, %v - %c = fadd double %a, %b - %d = fadd double %c, %z + %a = fmul reassoc double %x, %y + %b = fmul reassoc double %u, %v + %c = fadd reassoc double %a, %b + %d = fadd reassoc double %c, %z ret double %d } diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 0ffcb8c46cef9..acdf783f61388 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1672,9 +1672,9 @@ define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { ; AVX512: # %bb.0: ; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: retq - %m0 = fmul <4 x float> %x, - %m1 = fmul <4 x float> %x, - %a = fadd <4 x float> %m0, %m1 + %m0 = fmul contract reassoc <4 x float> %x, + %m1 = fmul contract reassoc <4 x float> %x, + %a = fadd contract reassoc <4 x float> %m0, %m1 ret <4 x float> %a } @@ -1697,9 +1697,9 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; AVX512-NEXT: retq - %m0 = fmul <4 x float> %x, - %m1 = fmul <4 x float> %m0, - %a = fadd <4 x float> %m1, %y + %m0 = fmul contract reassoc <4 x float> %x, + %m1 = fmul contract reassoc <4 x float> %m0, + %a = fadd contract reassoc <4 x float> %m1, %y ret <4 x float> %a } diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index fe5ddca67470c..d910110467ee0 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -1053,9 +1053,9 @@ define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 { ; AVX512: # %bb.0: ; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq - %m0 = fmul <16 x float> %x, - %m1 = fmul <16 x float> %x, - %a = fadd <16 x float> %m0, %m1 + %m0 = fmul contract reassoc <16 x float> %x, + %m1 = fmul contract reassoc <16 x float> %x, + %a = fadd contract reassoc <16 x float> %m0, %m1 ret <16 x float> %a } @@ -1080,9 +1080,9 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; AVX512-NEXT: retq - %m0 = fmul <16 x float> %x, - %m1 = fmul <16 x float> %m0, - %a = fadd <16 x float> %m1, %y + %m0 = fmul contract reassoc <16 x float> %x, + %m1 = fmul contract reassoc <16 x float> %m0, + %a = fadd contract reassoc <16 x float> %m1, %y ret <16 x float> %a } diff --git a/llvm/test/CodeGen/X86/fp-double-rounding.ll b/llvm/test/CodeGen/X86/fp-double-rounding.ll index 543908a10df29..957c0280f6653 100644 --- a/llvm/test/CodeGen/X86/fp-double-rounding.ll +++ b/llvm/test/CodeGen/X86/fp-double-rounding.ll @@ -4,16 +4,25 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--" -; CHECK-LABEL: double_rounding: +; CHECK-LABEL: double_rounding_safe: ; SAFE: callq __trunctfdf2 ; SAFE-NEXT: cvtsd2ss %xmm0 +define void @double_rounding_safe(ptr %x, ptr %f) { +entry: + %0 = load fp128, ptr %x, align 16 + %1 = fptrunc fp128 %0 to double + %2 = fptrunc double %1 to float + store float %2, ptr %f, align 4 + ret void +} +; CHECK-LABEL: double_rounding: ; UNSAFE: callq __trunctfsf2 ; UNSAFE-NOT: cvt define void @double_rounding(ptr %x, ptr %f) { entry: %0 = load fp128, ptr %x, align 16 - %1 = fptrunc fp128 %0 to double - %2 = fptrunc double %1 to float + %1 = fptrunc contract fp128 %0 to double + %2 = fptrunc contract double %1 to float store float %2, ptr %f, align 4 ret void }