diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4f9471ca4aa6a..c790df26bc8f9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25464,6 +25464,29 @@ static SDValue performCSELCombine(SDNode *N, } } + // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't + // use overflow flags, to avoid the comparison with zero. In case of success, + // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). + // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB + // nodes with their SUBS equivalent as is already done for other flag-setting + // operators, in which case doing the replacement here becomes redundant. + if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && + isNullConstant(Cond.getOperand(1))) { + SDValue Sub = Cond.getOperand(0); + AArch64CC::CondCode CC = + static_cast(N->getConstantOperandVal(2)); + if (Sub.getOpcode() == ISD::SUB && + (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || + CC == AArch64CC::PL)) { + SDLoc DL(N); + SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), + Sub.getOperand(0), Sub.getOperand(1)); + DCI.CombineTo(Sub.getNode(), Subs); + DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); + return SDValue(N, 0); + } + } + // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z if (SDValue CondLast = foldCSELofLASTB(N, DAG)) return CondLast; diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 75247823ee793..02c76ba7343a0 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -26,8 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -43,8 +41,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -60,8 +57,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -93,8 +89,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index bbdb116851710..bf52e71ec21fe 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -25,8 +24,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -41,8 +39,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -57,8 +54,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -88,8 +84,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -215,8 +210,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) @@ -229,8 +223,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) @@ -287,8 +280,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i8 %a, %b @@ -302,8 +294,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sge i16 %a, %b @@ -508,9 +499,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: addp d0, v0.2d -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w8, w8, mi ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: add x0, x9, x8 @@ -533,8 +523,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, %b @@ -548,8 +537,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sle i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index d07f099a536ab..400031b64cb84 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -26,8 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -43,8 +41,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -60,8 +57,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -93,8 +89,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 1045ee20dc734..8d2b0b0742d7d 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -25,8 +24,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -41,8 +39,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -57,8 +54,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -88,8 +84,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -219,8 +214,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -233,8 +227,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -293,8 +286,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b @@ -308,8 +300,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b @@ -373,10 +364,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: add x0, x9, x8 ; CHECK-NEXT: ret @@ -398,8 +388,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b @@ -413,8 +402,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll new file mode 100644 index 0000000000000..5036be9c45e69 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll @@ -0,0 +1,112 @@ +; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +; These tests ensure that we don't combine +; CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) +; if the flags set by SUBS(SUB(x,y), 0) have more than one use. +; +; This restriction exists because combining SUBS(SUB(x,y), 0) -> SUBS(x,y) is +; only valid if there are no users of the overflow flags (C/V) generated by the +; SUBS. Currently, we only check the flags used by the CSEL, and therefore we +; conservatively reject cases where the SUBS's flags have other uses. + +target triple = "aarch64-unknown-linux-gnu" + +; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs:' +; CHECK-NEXT: SelectionDAG has 13 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t5: i32 = sub t2, t4 +; CHECK-NEXT: t14: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> +; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t14:1 +; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 +; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 + +; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs:' +; CHECK-NEXT: SelectionDAG has 11 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t18: i32,i32 = AArch64ISD::SUBS t2, t4 +; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t18:1 +; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 +; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 + +define i32 @combine_subs(i32 %a, i32 %b) { + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' +; CHECK-NEXT: SelectionDAG has 14 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t5: i32 = sub t2, t4 +; CHECK-NEXT: t15: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> +; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t15:1 +; CHECK-NEXT: t10: i32 = add t17, t5 +; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 +; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 + +; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' +; CHECK-NEXT: SelectionDAG has 12 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t19:1 +; CHECK-NEXT: t10: i32 = add t17, t19 +; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 +; CHECK-NEXT: t19: i32,i32 = AArch64ISD::SUBS t2, t4 +; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 + +define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) { + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + %add = add i32 %sel, %sub + ret i32 %add +} + +; CHECK-LABEL: Legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' +; CHECK-NEXT: SelectionDAG has 19 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 +; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 +; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 +; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 +; CHECK-NEXT: t15: i32 = add t24, t23 +; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 +; CHECK-NEXT: t9: i32 = sub t2, t4 +; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> +; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 + +; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' +; CHECK-NEXT: SelectionDAG has 19 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 +; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 +; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 +; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 +; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 +; CHECK-NEXT: t15: i32 = add t24, t23 +; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 +; CHECK-NEXT: t9: i32 = sub t2, t4 +; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> +; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 + +define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) { + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + %other = select i1 %cc, i32 %c, i32 %d + %add = add i32 %sel, %other + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index 15c1dffae749e..d881e998db2bc 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -255,12 +255,11 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w9, w1 -; CHECK-NEXT: sxth w10, w0 +; CHECK-NEXT: sxth w9, w0 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, sxth ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -278,12 +277,11 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xffff -; CHECK-NEXT: and w10, w0, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, uxth ; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -382,12 +380,11 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w9, w1 -; CHECK-NEXT: sxtb w10, w0 +; CHECK-NEXT: sxtb w9, w0 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, sxtb ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -405,12 +402,11 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xff -; CHECK-NEXT: and w10, w0, #0xff +; CHECK-NEXT: and w9, w0, #0xff ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w10, w9 -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: subs w9, w9, w1, uxtb ; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret