-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: AZero13 (AZero13) ChangesWe check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite. Also need help in fixing the regression for 64 bit overflow on 32 bits Patch is 56.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147194.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9bbb89e37865d..303fb8d08a0b7 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -470,10 +470,11 @@ class CodeGenPrepare {
bool tryToSinkFreeOperands(Instruction *I);
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
- CmpInst *Cmp, Intrinsic::ID IID);
+ CmpInst *Cmp, Intrinsic::ID IID, bool NegateOverflow = false);
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool optimizeURem(Instruction *Rem);
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUSubWithNegatedOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool unfoldPowerOf2Test(CmpInst *Cmp);
void verifyBFIUpdates(Function &F);
@@ -1552,7 +1553,7 @@ static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp,
- Intrinsic::ID IID) {
+ Intrinsic::ID IID, bool NegateOverflow) {
auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
if (!isIVIncrement(BO, LI))
return false;
@@ -1624,6 +1625,8 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
assert(BO->hasOneUse() &&
"Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+ if (NegateOverflow)
+ OV = Builder.CreateXor(OV, ConstantInt::getAllOnesValue(OV->getType()));
replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
Cmp->eraseFromParent();
BO->eraseFromParent();
@@ -1759,6 +1762,71 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
return true;
}
+bool CodeGenPrepare::combineToUSubWithNegatedOverflow(CmpInst *Cmp,
+ ModifyDT &ModifiedDT) {
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ if (isa<Constant>(A) && isa<Constant>(B))
+ return false;
+
+ // Convert (A u<= B) to (A u>= B) to simplify pattern matching.
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_ULE) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_UGE;
+ }
+ // Convert special-case: (A != 0) is the same as (A u>= 1).
+ if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_UGE;
+ }
+
+ // Convert special-case: (A == 0) is the same as (0 u>= A).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_UGE;
+ }
+
+ if (Pred != ICmpInst::ICMP_UGE)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u> B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u> C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+ TLI->getValueType(*DL, Sub->getType()),
+ Sub->hasNUsesOrMore(1)))
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+ Cmp, Intrinsic::usub_with_overflow, true))
+ return false;
+
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = ModifyDT::ModifyInstDT;
+ return true;
+}
+
// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
// The same transformation exists in DAG combiner, but we repeat it here because
// DAG builder can break the pattern by moving icmp into a successor block.
@@ -2224,6 +2292,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
+ if (combineToUSubWithNegatedOverflow(Cmp, ModifiedDT))
+ return true;
+
if (unfoldPowerOf2Test(Cmp))
return true;
diff --git a/llvm/test/CodeGen/X86/sub-with-overflow.ll b/llvm/test/CodeGen/X86/sub-with-overflow.ll
index d3bd3b1cdf0ac..3aa1c8e217ac8 100644
--- a/llvm/test/CodeGen/X86/sub-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/sub-with-overflow.ll
@@ -93,3 +93,82 @@ entry:
ret i1 %obit
}
+
+define i1 @usubo_uge_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retl
+ %s = sub i64 %x, %y
+ %ov = icmp uge i64 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_uge_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_math_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset %esi, -8
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movl %esi, 4(%ecx)
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: retl
+ %s = sub i64 %x, %y
+ store i64 %s, ptr %p
+ %ov = icmp uge i64 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_ule_i32_overflow_used(i32 %x, i32 %y, ptr %p) {
+; CHECK-LABEL: usubo_ule_i32_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retl
+ %s = sub i32 %y, %x
+ %ov = icmp ule i32 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_ne_zero_i16_overflow_used(i16 %x, ptr %p) {
+; CHECK-LABEL: usubo_ne_zero_i16_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: subw $1, %dx
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movw %dx, (%ecx)
+; CHECK-NEXT: retl
+ %s = sub i16 %x, 1
+ store i16 %s, ptr %p
+ %ov = icmp ne i16 %x, 0
+ ret i1 %ov
+}
+
+define i1 @usubo_eq_zero_i8_overflow_used(i8 %x, ptr %p) {
+; CHECK-LABEL: usubo_eq_zero_i8_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: subb {{[0-9]+}}(%esp), %dl
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movb %dl, (%ecx)
+; CHECK-NEXT: retl
+ %s = sub i8 0, %x
+ store i8 %s, ptr %p
+ %ov = icmp eq i8 %x, 0
+ ret i1 %ov
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index 653f346356488..e6c99b32ea0dd 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -12,6 +12,16 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG14:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG14]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG14]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META9:![0-9]+]], !DIExpression(), [[DBG14]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG16:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META13:![0-9]+]], !DIExpression(), [[DBG16]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG17:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -25,8 +35,19 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG23:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG23]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG23]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META21:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG25:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META22:![0-9]+]], !DIExpression(), [[DBG25]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG26:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG27:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -42,6 +63,16 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG33:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG33]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG33]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META30:![0-9]+]], !DIExpression(), [[DBG33]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META31:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG35:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META32:![0-9]+]], !DIExpression(), [[DBG35]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG36:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -55,8 +86,19 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG42:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG42]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG42]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META39:![0-9]+]], !DIExpression(), [[DBG42]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG44:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META41:![0-9]+]], !DIExpression(), [[DBG44]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG45:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG46:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -72,6 +114,16 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG52:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG52]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG52]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META49:![0-9]+]], !DIExpression(), [[DBG52]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META50:![0-9]+]], !DIExpression(), [[META53:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG54:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG55:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -85,8 +137,19 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG61:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG61]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG61]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META58:![0-9]+]], !DIExpression(), [[DBG61]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META59:![0-9]+]], !DIExpression(), [[META62:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG63:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG64:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG65:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -109,6 +172,20 @@ define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
; CHECK: exit:
; CHECK-NEXT: ret i64 0
;
+; DEBUG-LABEL: @uaddo4(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG71:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META68:![0-9]+]], !DIExpression(), [[DBG71]])
+; DEBUG-NEXT: #dbg_value(i1 poison, [[META69:![0-9]+]], !DIExpression(), [[META72:![0-9]+]])
+; DEBUG-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG73:![0-9]+]]
+; DEBUG: next:
+; DEBUG-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META72]]
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG74:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META70:![0-9]+]], !DIExpression(), [[DBG74]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG75:![0-9]+]]
+; DEBUG: exit:
+; DEBUG-NEXT: ret i64 0, !dbg [[DBG76:![0-9]+]]
+;
entry:
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -126,7 +203,7 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
; CHECK-LABEL: @uaddo5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT: store i64 [[ADD]], ptr [[PTR:%.*]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[PTR:%.*]], align 8
; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
; CHECK: next:
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
@@ -135,6 +212,21 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
; CHECK: exit:
; CHECK-NEXT: ret i64 0
;
+; DEBUG-LABEL: @uaddo5(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG82:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META79:![0-9]+]], !DIExpression(), [[DBG82]])
+; DEBUG-NEXT: store i64 [[ADD]], ptr [[PTR:%.*]], align 8, !dbg [[DBG83:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 poison, [[META80:![0-9]+]], !DIExpression(), [[META84:![0-9]+]])
+; DEBUG-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG85:![0-9]+]]
+; DEBUG: next:
+; DEBUG-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META84]]
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG86:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META81:![0-9]+]], !DIExpression(), [[DBG86]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG87:![0-9]+]]
+; DEBUG: exit:
+; DEBUG-NEXT: ret i64 0, !dbg [[DBG88:![0-9]+]]
+;
entry:
%add = add i64 %b, %a
store i64 %add, ptr %ptr
@@ -157,6 +249,15 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor(
+; DEBUG-NEXT: #dbg_value(i64 poison, [[META91:![0-9]+]], !DIExpression(), [[META94:![0-9]+]])
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG95:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG95]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META92:![0-9]+]], !DIExpression(), [[DBG95]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG96:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META93:![0-9]+]], !DIExpression(), [[DBG96]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG97:![0-9]+]]
;
%x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b
@@ -170,6 +271,15 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_commuted(
+; DEBUG-NEXT: #dbg_value(i64 poison, [[META100:![0-9]+]], !DIExpression(), [[META103:![0-9]+]])
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG104:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG104]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META101:![0-9]+]], !DIExpression(), [[DBG104]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG105:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META102:![0-9]+]], !DIExpression(), [[DBG105]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG106:![0-9]+]]
;
%x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b
@@ -186,6 +296,16 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
; CHECK-NEXT: call void @use(i64 [[X]])
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_multi_use(
+; DEBUG-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]], !dbg [[DBG112:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[X]], [[META109:![0-9]+]], !DIExpression(), [[DBG112]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]], !dbg [[DBG113:![0...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: AZero13 (AZero13) ChangesWe check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite. Also need help in fixing the regression for 64 bit overflow on 32 bits Patch is 56.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147194.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9bbb89e37865d..303fb8d08a0b7 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -470,10 +470,11 @@ class CodeGenPrepare {
bool tryToSinkFreeOperands(Instruction *I);
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
- CmpInst *Cmp, Intrinsic::ID IID);
+ CmpInst *Cmp, Intrinsic::ID IID, bool NegateOverflow = false);
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool optimizeURem(Instruction *Rem);
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUSubWithNegatedOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool unfoldPowerOf2Test(CmpInst *Cmp);
void verifyBFIUpdates(Function &F);
@@ -1552,7 +1553,7 @@ static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp,
- Intrinsic::ID IID) {
+ Intrinsic::ID IID, bool NegateOverflow) {
auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
if (!isIVIncrement(BO, LI))
return false;
@@ -1624,6 +1625,8 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
assert(BO->hasOneUse() &&
"Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+ if (NegateOverflow)
+ OV = Builder.CreateXor(OV, ConstantInt::getAllOnesValue(OV->getType()));
replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
Cmp->eraseFromParent();
BO->eraseFromParent();
@@ -1759,6 +1762,71 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
return true;
}
+bool CodeGenPrepare::combineToUSubWithNegatedOverflow(CmpInst *Cmp,
+ ModifyDT &ModifiedDT) {
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ if (isa<Constant>(A) && isa<Constant>(B))
+ return false;
+
+ // Convert (A u<= B) to (A u>= B) to simplify pattern matching.
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_ULE) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_UGE;
+ }
+ // Convert special-case: (A != 0) is the same as (A u>= 1).
+ if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_UGE;
+ }
+
+ // Convert special-case: (A == 0) is the same as (0 u>= A).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_UGE;
+ }
+
+ if (Pred != ICmpInst::ICMP_UGE)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u> B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u> C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+ TLI->getValueType(*DL, Sub->getType()),
+ Sub->hasNUsesOrMore(1)))
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+ Cmp, Intrinsic::usub_with_overflow, true))
+ return false;
+
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = ModifyDT::ModifyInstDT;
+ return true;
+}
+
// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
// The same transformation exists in DAG combiner, but we repeat it here because
// DAG builder can break the pattern by moving icmp into a successor block.
@@ -2224,6 +2292,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
+ if (combineToUSubWithNegatedOverflow(Cmp, ModifiedDT))
+ return true;
+
if (unfoldPowerOf2Test(Cmp))
return true;
diff --git a/llvm/test/CodeGen/X86/sub-with-overflow.ll b/llvm/test/CodeGen/X86/sub-with-overflow.ll
index d3bd3b1cdf0ac..3aa1c8e217ac8 100644
--- a/llvm/test/CodeGen/X86/sub-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/sub-with-overflow.ll
@@ -93,3 +93,82 @@ entry:
ret i1 %obit
}
+
+define i1 @usubo_uge_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retl
+ %s = sub i64 %x, %y
+ %ov = icmp uge i64 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_uge_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_math_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset %esi, -8
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movl %esi, 4(%ecx)
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: retl
+ %s = sub i64 %x, %y
+ store i64 %s, ptr %p
+ %ov = icmp uge i64 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_ule_i32_overflow_used(i32 %x, i32 %y, ptr %p) {
+; CHECK-LABEL: usubo_ule_i32_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retl
+ %s = sub i32 %y, %x
+ %ov = icmp ule i32 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_ne_zero_i16_overflow_used(i16 %x, ptr %p) {
+; CHECK-LABEL: usubo_ne_zero_i16_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: subw $1, %dx
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movw %dx, (%ecx)
+; CHECK-NEXT: retl
+ %s = sub i16 %x, 1
+ store i16 %s, ptr %p
+ %ov = icmp ne i16 %x, 0
+ ret i1 %ov
+}
+
+define i1 @usubo_eq_zero_i8_overflow_used(i8 %x, ptr %p) {
+; CHECK-LABEL: usubo_eq_zero_i8_overflow_used:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: subb {{[0-9]+}}(%esp), %dl
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movb %dl, (%ecx)
+; CHECK-NEXT: retl
+ %s = sub i8 0, %x
+ store i8 %s, ptr %p
+ %ov = icmp eq i8 %x, 0
+ ret i1 %ov
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index 653f346356488..e6c99b32ea0dd 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -12,6 +12,16 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG14:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG14]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG14]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META9:![0-9]+]], !DIExpression(), [[DBG14]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG16:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META13:![0-9]+]], !DIExpression(), [[DBG16]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG17:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -25,8 +35,19 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG23:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG23]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG23]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META21:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG25:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META22:![0-9]+]], !DIExpression(), [[DBG25]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG26:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG27:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -42,6 +63,16 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG33:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG33]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG33]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META30:![0-9]+]], !DIExpression(), [[DBG33]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META31:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG35:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META32:![0-9]+]], !DIExpression(), [[DBG35]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG36:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -55,8 +86,19 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG42:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG42]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG42]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META39:![0-9]+]], !DIExpression(), [[DBG42]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG44:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META41:![0-9]+]], !DIExpression(), [[DBG44]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG45:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG46:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -72,6 +114,16 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG52:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG52]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG52]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META49:![0-9]+]], !DIExpression(), [[DBG52]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META50:![0-9]+]], !DIExpression(), [[META53:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG54:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG55:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -85,8 +137,19 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG61:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG61]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG61]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META58:![0-9]+]], !DIExpression(), [[DBG61]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META59:![0-9]+]], !DIExpression(), [[META62:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG63:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG64:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG65:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -109,6 +172,20 @@ define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
; CHECK: exit:
; CHECK-NEXT: ret i64 0
;
+; DEBUG-LABEL: @uaddo4(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG71:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META68:![0-9]+]], !DIExpression(), [[DBG71]])
+; DEBUG-NEXT: #dbg_value(i1 poison, [[META69:![0-9]+]], !DIExpression(), [[META72:![0-9]+]])
+; DEBUG-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG73:![0-9]+]]
+; DEBUG: next:
+; DEBUG-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META72]]
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG74:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META70:![0-9]+]], !DIExpression(), [[DBG74]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG75:![0-9]+]]
+; DEBUG: exit:
+; DEBUG-NEXT: ret i64 0, !dbg [[DBG76:![0-9]+]]
+;
entry:
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -126,7 +203,7 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
; CHECK-LABEL: @uaddo5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT: store i64 [[ADD]], ptr [[PTR:%.*]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[PTR:%.*]], align 8
; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
; CHECK: next:
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
@@ -135,6 +212,21 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
; CHECK: exit:
; CHECK-NEXT: ret i64 0
;
+; DEBUG-LABEL: @uaddo5(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG82:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META79:![0-9]+]], !DIExpression(), [[DBG82]])
+; DEBUG-NEXT: store i64 [[ADD]], ptr [[PTR:%.*]], align 8, !dbg [[DBG83:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 poison, [[META80:![0-9]+]], !DIExpression(), [[META84:![0-9]+]])
+; DEBUG-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG85:![0-9]+]]
+; DEBUG: next:
+; DEBUG-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META84]]
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG86:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META81:![0-9]+]], !DIExpression(), [[DBG86]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG87:![0-9]+]]
+; DEBUG: exit:
+; DEBUG-NEXT: ret i64 0, !dbg [[DBG88:![0-9]+]]
+;
entry:
%add = add i64 %b, %a
store i64 %add, ptr %ptr
@@ -157,6 +249,15 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor(
+; DEBUG-NEXT: #dbg_value(i64 poison, [[META91:![0-9]+]], !DIExpression(), [[META94:![0-9]+]])
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG95:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG95]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META92:![0-9]+]], !DIExpression(), [[DBG95]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG96:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META93:![0-9]+]], !DIExpression(), [[DBG96]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG97:![0-9]+]]
;
%x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b
@@ -170,6 +271,15 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_commuted(
+; DEBUG-NEXT: #dbg_value(i64 poison, [[META100:![0-9]+]], !DIExpression(), [[META103:![0-9]+]])
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG104:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG104]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META101:![0-9]+]], !DIExpression(), [[DBG104]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG105:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META102:![0-9]+]], !DIExpression(), [[DBG105]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG106:![0-9]+]]
;
%x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b
@@ -186,6 +296,16 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
; CHECK-NEXT: call void @use(i64 [[X]])
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_multi_use(
+; DEBUG-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]], !dbg [[DBG112:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[X]], [[META109:![0-9]+]], !DIExpression(), [[DBG112]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]], !dbg [[DBG113:![0...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…ondition We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.
It can only be beneficial and is lowered to a simple adds + carry check.
We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.
Also need help in fixing the regression for 64 bit overflow on 32 bits