[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194

AZero13 · 2025-07-06T15:32:58Z

We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.

Also need help in fixing the regression for 64 bit overflow on 32 bits

llvmbot · 2025-07-06T15:33:27Z

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.

Also need help in fixing the regression for 64 bit overflow on 32 bits

Patch is 56.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147194.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+73-2)
(modified) llvm/test/CodeGen/X86/sub-with-overflow.ll (+79)
(modified) llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll (+512-25)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9bbb89e37865d..303fb8d08a0b7 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -470,10 +470,11 @@ class CodeGenPrepare {
 
   bool tryToSinkFreeOperands(Instruction *I);
   bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
-                                   CmpInst *Cmp, Intrinsic::ID IID);
+                                   CmpInst *Cmp, Intrinsic::ID IID, bool NegateOverflow = false);
   bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
   bool optimizeURem(Instruction *Rem);
   bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+  bool combineToUSubWithNegatedOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
   bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
   bool unfoldPowerOf2Test(CmpInst *Cmp);
   void verifyBFIUpdates(Function &F);
@@ -1552,7 +1553,7 @@ static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
 bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
                                                  Value *Arg0, Value *Arg1,
                                                  CmpInst *Cmp,
-                                                 Intrinsic::ID IID) {
+                                                 Intrinsic::ID IID, bool NegateOverflow) {
   auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
     if (!isIVIncrement(BO, LI))
       return false;
@@ -1624,6 +1625,8 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
     assert(BO->hasOneUse() &&
            "Patterns with XOr should use the BO only in the compare");
   Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+  if (NegateOverflow)
+    OV = Builder.CreateXor(OV, ConstantInt::getAllOnesValue(OV->getType()));
   replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
   Cmp->eraseFromParent();
   BO->eraseFromParent();
@@ -1759,6 +1762,71 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
   return true;
 }
 
+bool CodeGenPrepare::combineToUSubWithNegatedOverflow(CmpInst *Cmp,
+                                                      ModifyDT &ModifiedDT) {
+  // We are not expecting non-canonical/degenerate code. Just bail out.
+  Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+  if (isa<Constant>(A) && isa<Constant>(B))
+    return false;
+
+  // Convert (A u<= B) to (A u>= B) to simplify pattern matching.
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  if (Pred == ICmpInst::ICMP_ULE) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_UGE;
+  }
+  // Convert special-case: (A != 0) is the same as (A u>= 1).
+  if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+    B = ConstantInt::get(B->getType(), 1);
+    Pred = ICmpInst::ICMP_UGE;
+  }
+
+  // Convert special-case: (A == 0) is the same as (0 u>= A).
+  if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_UGE;
+  }
+
+  if (Pred != ICmpInst::ICMP_UGE)
+    return false;
+
+  // Walk the users of a variable operand of a compare looking for a subtract or
+  // add with that same operand. Also match the 2nd operand of the compare to
+  // the add/sub, but that may be a negated constant operand of an add.
+  Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+  BinaryOperator *Sub = nullptr;
+  for (User *U : CmpVariableOperand->users()) {
+    // A - B, A u> B --> usubo(A, B)
+    if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+
+    // A + (-C), A u> C (canonicalized form of (sub A, C))
+    const APInt *CmpC, *AddC;
+    if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+        match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+  }
+  if (!Sub)
+    return false;
+
+  if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+                                 TLI->getValueType(*DL, Sub->getType()),
+                                 Sub->hasNUsesOrMore(1)))
+    return false;
+
+  if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+                                   Cmp, Intrinsic::usub_with_overflow, true))
+    return false;
+
+  // Reset callers - do not crash by iterating over a dead instruction.
+  ModifiedDT = ModifyDT::ModifyInstDT;
+  return true;
+}
+
 // Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
 // The same transformation exists in DAG combiner, but we repeat it here because
 // DAG builder can break the pattern by moving icmp into a successor block.
@@ -2224,6 +2292,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (combineToUSubWithOverflow(Cmp, ModifiedDT))
     return true;
 
+  if (combineToUSubWithNegatedOverflow(Cmp, ModifiedDT))
+    return true;
+
   if (unfoldPowerOf2Test(Cmp))
     return true;
 
diff --git a/llvm/test/CodeGen/X86/sub-with-overflow.ll b/llvm/test/CodeGen/X86/sub-with-overflow.ll
index d3bd3b1cdf0ac..3aa1c8e217ac8 100644
--- a/llvm/test/CodeGen/X86/sub-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/sub-with-overflow.ll
@@ -93,3 +93,82 @@ entry:
   ret i1 %obit
 
 }
+
+define i1 @usubo_uge_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retl
+  %s = sub i64 %x, %y
+  %ov = icmp uge i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_uge_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_math_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %esi, -8
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    movl %edx, (%ecx)
+; CHECK-NEXT:    movl %esi, 4(%ecx)
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+  %s = sub i64 %x, %y
+  store i64 %s, ptr %p
+  %ov = icmp uge i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ule_i32_overflow_used(i32 %x, i32 %y, ptr %p) {
+; CHECK-LABEL: usubo_ule_i32_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retl
+  %s = sub i32 %y, %x
+  %ov = icmp ule i32 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ne_zero_i16_overflow_used(i16 %x, ptr %p) {
+; CHECK-LABEL: usubo_ne_zero_i16_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    subw $1, %dx
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    movw %dx, (%ecx)
+; CHECK-NEXT:    retl
+  %s = sub i16 %x, 1
+  store i16 %s, ptr %p
+  %ov = icmp ne i16 %x, 0
+  ret i1 %ov
+}
+
+define i1 @usubo_eq_zero_i8_overflow_used(i8 %x, ptr %p) {
+; CHECK-LABEL: usubo_eq_zero_i8_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    subb {{[0-9]+}}(%esp), %dl
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    movb %dl, (%ecx)
+; CHECK-NEXT:    retl
+  %s = sub i8 0, %x
+  store i8 %s, ptr %p
+  %ov = icmp eq i8 %x, 0
+  ret i1 %ov
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index 653f346356488..e6c99b32ea0dd 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -12,6 +12,16 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG14:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG14]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG14]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META9:![0-9]+]], !DIExpression(), [[DBG14]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG16:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META13:![0-9]+]], !DIExpression(), [[DBG16]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG17:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %a
@@ -25,8 +35,19 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
 ; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_math_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG23:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG23]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG23]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META21:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG25:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META22:![0-9]+]], !DIExpression(), [[DBG25]])
+; DEBUG-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG26:![0-9]+]]
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG27:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %a
@@ -42,6 +63,16 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG33:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG33]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG33]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META30:![0-9]+]], !DIExpression(), [[DBG33]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META31:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG35:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META32:![0-9]+]], !DIExpression(), [[DBG35]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG36:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %b
@@ -55,8 +86,19 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
 ; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_math_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG42:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG42]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG42]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META39:![0-9]+]], !DIExpression(), [[DBG42]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG44:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META41:![0-9]+]], !DIExpression(), [[DBG44]])
+; DEBUG-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG45:![0-9]+]]
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG46:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %b
@@ -72,6 +114,16 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG52:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG52]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG52]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META49:![0-9]+]], !DIExpression(), [[DBG52]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META50:![0-9]+]], !DIExpression(), [[META53:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG54:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG55:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ugt i64 %b, %add
@@ -85,8 +137,19 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
 ; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_math_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG61:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG61]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG61]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META58:![0-9]+]], !DIExpression(), [[DBG61]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META59:![0-9]+]], !DIExpression(), [[META62:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG63:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]])
+; DEBUG-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG64:![0-9]+]]
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG65:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ugt i64 %b, %add
@@ -109,6 +172,20 @@ define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i64 0
 ;
+; DEBUG-LABEL: @uaddo4(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG71:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[ADD]], [[META68:![0-9]+]], !DIExpression(), [[DBG71]])
+; DEBUG-NEXT:      #dbg_value(i1 poison, [[META69:![0-9]+]], !DIExpression(), [[META72:![0-9]+]])
+; DEBUG-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG73:![0-9]+]]
+; DEBUG:       next:
+; DEBUG-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META72]]
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG74:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META70:![0-9]+]], !DIExpression(), [[DBG74]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG75:![0-9]+]]
+; DEBUG:       exit:
+; DEBUG-NEXT:    ret i64 0, !dbg [[DBG76:![0-9]+]]
+;
 entry:
   %add = add i64 %b, %a
   %cmp = icmp ugt i64 %b, %add
@@ -126,7 +203,7 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
 ; CHECK-LABEL: @uaddo5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    store i64 [[ADD]], ptr [[PTR:%.*]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
 ; CHECK:       next:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
@@ -135,6 +212,21 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i64 0
 ;
+; DEBUG-LABEL: @uaddo5(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG82:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[ADD]], [[META79:![0-9]+]], !DIExpression(), [[DBG82]])
+; DEBUG-NEXT:    store i64 [[ADD]], ptr [[PTR:%.*]], align 8, !dbg [[DBG83:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i1 poison, [[META80:![0-9]+]], !DIExpression(), [[META84:![0-9]+]])
+; DEBUG-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG85:![0-9]+]]
+; DEBUG:       next:
+; DEBUG-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META84]]
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG86:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META81:![0-9]+]], !DIExpression(), [[DBG86]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG87:![0-9]+]]
+; DEBUG:       exit:
+; DEBUG-NEXT:    ret i64 0, !dbg [[DBG88:![0-9]+]]
+;
 entry:
   %add = add i64 %b, %a
   store i64 %add, ptr %ptr
@@ -157,6 +249,15 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor(
+; DEBUG-NEXT:      #dbg_value(i64 poison, [[META91:![0-9]+]], !DIExpression(), [[META94:![0-9]+]])
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG95:![0-9]+]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG95]]
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META92:![0-9]+]], !DIExpression(), [[DBG95]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG96:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META93:![0-9]+]], !DIExpression(), [[DBG96]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG97:![0-9]+]]
 ;
   %x = xor i64 %a, -1
   %cmp = icmp ult i64 %x, %b
@@ -170,6 +271,15 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_commuted(
+; DEBUG-NEXT:      #dbg_value(i64 poison, [[META100:![0-9]+]], !DIExpression(), [[META103:![0-9]+]])
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG104:![0-9]+]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG104]]
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META101:![0-9]+]], !DIExpression(), [[DBG104]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG105:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META102:![0-9]+]], !DIExpression(), [[DBG105]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG106:![0-9]+]]
 ;
   %x = xor i64 %a, -1
   %cmp = icmp ult i64 %x, %b
@@ -186,6 +296,16 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
 ; CHECK-NEXT:    call void @use(i64 [[X]])
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_multi_use(
+; DEBUG-NEXT:    [[X:%.*]] = xor i64 -1, [[A:%.*]], !dbg [[DBG112:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[X]], [[META109:![0-9]+]], !DIExpression(), [[DBG112]])
+; DEBUG-NEXT:    [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]], !dbg [[DBG113:![0...
[truncated]

llvmbot · 2025-07-06T15:33:27Z

@llvm/pr-subscribers-llvm-transforms

Author: AZero13 (AZero13)

Changes

We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.

Also need help in fixing the regression for 64 bit overflow on 32 bits

Patch is 56.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147194.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+73-2)
(modified) llvm/test/CodeGen/X86/sub-with-overflow.ll (+79)
(modified) llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll (+512-25)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9bbb89e37865d..303fb8d08a0b7 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -470,10 +470,11 @@ class CodeGenPrepare {
 
   bool tryToSinkFreeOperands(Instruction *I);
   bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
-                                   CmpInst *Cmp, Intrinsic::ID IID);
+                                   CmpInst *Cmp, Intrinsic::ID IID, bool NegateOverflow = false);
   bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
   bool optimizeURem(Instruction *Rem);
   bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+  bool combineToUSubWithNegatedOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
   bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
   bool unfoldPowerOf2Test(CmpInst *Cmp);
   void verifyBFIUpdates(Function &F);
@@ -1552,7 +1553,7 @@ static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
 bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
                                                  Value *Arg0, Value *Arg1,
                                                  CmpInst *Cmp,
-                                                 Intrinsic::ID IID) {
+                                                 Intrinsic::ID IID, bool NegateOverflow) {
   auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
     if (!isIVIncrement(BO, LI))
       return false;
@@ -1624,6 +1625,8 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
     assert(BO->hasOneUse() &&
            "Patterns with XOr should use the BO only in the compare");
   Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+  if (NegateOverflow)
+    OV = Builder.CreateXor(OV, ConstantInt::getAllOnesValue(OV->getType()));
   replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
   Cmp->eraseFromParent();
   BO->eraseFromParent();
@@ -1759,6 +1762,71 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
   return true;
 }
 
+bool CodeGenPrepare::combineToUSubWithNegatedOverflow(CmpInst *Cmp,
+                                                      ModifyDT &ModifiedDT) {
+  // We are not expecting non-canonical/degenerate code. Just bail out.
+  Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+  if (isa<Constant>(A) && isa<Constant>(B))
+    return false;
+
+  // Convert (A u<= B) to (A u>= B) to simplify pattern matching.
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  if (Pred == ICmpInst::ICMP_ULE) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_UGE;
+  }
+  // Convert special-case: (A != 0) is the same as (A u>= 1).
+  if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+    B = ConstantInt::get(B->getType(), 1);
+    Pred = ICmpInst::ICMP_UGE;
+  }
+
+  // Convert special-case: (A == 0) is the same as (0 u>= A).
+  if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_UGE;
+  }
+
+  if (Pred != ICmpInst::ICMP_UGE)
+    return false;
+
+  // Walk the users of a variable operand of a compare looking for a subtract or
+  // add with that same operand. Also match the 2nd operand of the compare to
+  // the add/sub, but that may be a negated constant operand of an add.
+  Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+  BinaryOperator *Sub = nullptr;
+  for (User *U : CmpVariableOperand->users()) {
+    // A - B, A u> B --> usubo(A, B)
+    if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+
+    // A + (-C), A u> C (canonicalized form of (sub A, C))
+    const APInt *CmpC, *AddC;
+    if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+        match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+  }
+  if (!Sub)
+    return false;
+
+  if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+                                 TLI->getValueType(*DL, Sub->getType()),
+                                 Sub->hasNUsesOrMore(1)))
+    return false;
+
+  if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+                                   Cmp, Intrinsic::usub_with_overflow, true))
+    return false;
+
+  // Reset callers - do not crash by iterating over a dead instruction.
+  ModifiedDT = ModifyDT::ModifyInstDT;
+  return true;
+}
+
 // Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
 // The same transformation exists in DAG combiner, but we repeat it here because
 // DAG builder can break the pattern by moving icmp into a successor block.
@@ -2224,6 +2292,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (combineToUSubWithOverflow(Cmp, ModifiedDT))
     return true;
 
+  if (combineToUSubWithNegatedOverflow(Cmp, ModifiedDT))
+    return true;
+
   if (unfoldPowerOf2Test(Cmp))
     return true;
 
diff --git a/llvm/test/CodeGen/X86/sub-with-overflow.ll b/llvm/test/CodeGen/X86/sub-with-overflow.ll
index d3bd3b1cdf0ac..3aa1c8e217ac8 100644
--- a/llvm/test/CodeGen/X86/sub-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/sub-with-overflow.ll
@@ -93,3 +93,82 @@ entry:
   ret i1 %obit
 
 }
+
+define i1 @usubo_uge_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retl
+  %s = sub i64 %x, %y
+  %ov = icmp uge i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_uge_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
+; CHECK-LABEL: usubo_uge_i64_math_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %esi, -8
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    movl %edx, (%ecx)
+; CHECK-NEXT:    movl %esi, 4(%ecx)
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+  %s = sub i64 %x, %y
+  store i64 %s, ptr %p
+  %ov = icmp uge i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ule_i32_overflow_used(i32 %x, i32 %y, ptr %p) {
+; CHECK-LABEL: usubo_ule_i32_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retl
+  %s = sub i32 %y, %x
+  %ov = icmp ule i32 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ne_zero_i16_overflow_used(i16 %x, ptr %p) {
+; CHECK-LABEL: usubo_ne_zero_i16_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    subw $1, %dx
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    movw %dx, (%ecx)
+; CHECK-NEXT:    retl
+  %s = sub i16 %x, 1
+  store i16 %s, ptr %p
+  %ov = icmp ne i16 %x, 0
+  ret i1 %ov
+}
+
+define i1 @usubo_eq_zero_i8_overflow_used(i8 %x, ptr %p) {
+; CHECK-LABEL: usubo_eq_zero_i8_overflow_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    subb {{[0-9]+}}(%esp), %dl
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    movb %dl, (%ecx)
+; CHECK-NEXT:    retl
+  %s = sub i8 0, %x
+  store i8 %s, ptr %p
+  %ov = icmp eq i8 %x, 0
+  ret i1 %ov
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index 653f346356488..e6c99b32ea0dd 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -12,6 +12,16 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG14:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG14]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG14]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META9:![0-9]+]], !DIExpression(), [[DBG14]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG16:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META13:![0-9]+]], !DIExpression(), [[DBG16]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG17:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %a
@@ -25,8 +35,19 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
 ; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_math_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG23:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG23]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG23]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META21:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG25:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META22:![0-9]+]], !DIExpression(), [[DBG25]])
+; DEBUG-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG26:![0-9]+]]
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG27:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %a
@@ -42,6 +63,16 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG33:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG33]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG33]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META30:![0-9]+]], !DIExpression(), [[DBG33]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META31:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG35:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META32:![0-9]+]], !DIExpression(), [[DBG35]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG36:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %b
@@ -55,8 +86,19 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
 ; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_math_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG42:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG42]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG42]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META39:![0-9]+]], !DIExpression(), [[DBG42]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG44:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META41:![0-9]+]], !DIExpression(), [[DBG44]])
+; DEBUG-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG45:![0-9]+]]
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG46:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ult i64 %add, %b
@@ -72,6 +114,16 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG52:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG52]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG52]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META49:![0-9]+]], !DIExpression(), [[DBG52]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META50:![0-9]+]], !DIExpression(), [[META53:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG54:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG55:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ugt i64 %b, %add
@@ -85,8 +137,19 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
 ; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_math_overflow_used(
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG61:![0-9]+]]
+; DEBUG-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG61]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG61]]
+; DEBUG-NEXT:      #dbg_value(i64 [[MATH]], [[META58:![0-9]+]], !DIExpression(), [[DBG61]])
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META59:![0-9]+]], !DIExpression(), [[META62:![0-9]+]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG63:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]])
+; DEBUG-NEXT:    store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG64:![0-9]+]]
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG65:![0-9]+]]
 ;
   %add = add i64 %b, %a
   %cmp = icmp ugt i64 %b, %add
@@ -109,6 +172,20 @@ define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i64 0
 ;
+; DEBUG-LABEL: @uaddo4(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG71:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[ADD]], [[META68:![0-9]+]], !DIExpression(), [[DBG71]])
+; DEBUG-NEXT:      #dbg_value(i1 poison, [[META69:![0-9]+]], !DIExpression(), [[META72:![0-9]+]])
+; DEBUG-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG73:![0-9]+]]
+; DEBUG:       next:
+; DEBUG-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META72]]
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG74:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META70:![0-9]+]], !DIExpression(), [[DBG74]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG75:![0-9]+]]
+; DEBUG:       exit:
+; DEBUG-NEXT:    ret i64 0, !dbg [[DBG76:![0-9]+]]
+;
 entry:
   %add = add i64 %b, %a
   %cmp = icmp ugt i64 %b, %add
@@ -126,7 +203,7 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
 ; CHECK-LABEL: @uaddo5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    store i64 [[ADD]], ptr [[PTR:%.*]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
 ; CHECK:       next:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
@@ -135,6 +212,21 @@ define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i64 0
 ;
+; DEBUG-LABEL: @uaddo5(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG82:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[ADD]], [[META79:![0-9]+]], !DIExpression(), [[DBG82]])
+; DEBUG-NEXT:    store i64 [[ADD]], ptr [[PTR:%.*]], align 8, !dbg [[DBG83:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i1 poison, [[META80:![0-9]+]], !DIExpression(), [[META84:![0-9]+]])
+; DEBUG-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]], !dbg [[DBG85:![0-9]+]]
+; DEBUG:       next:
+; DEBUG-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[META84]]
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42, !dbg [[DBG86:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META81:![0-9]+]], !DIExpression(), [[DBG86]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG87:![0-9]+]]
+; DEBUG:       exit:
+; DEBUG-NEXT:    ret i64 0, !dbg [[DBG88:![0-9]+]]
+;
 entry:
   %add = add i64 %b, %a
   store i64 %add, ptr %ptr
@@ -157,6 +249,15 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor(
+; DEBUG-NEXT:      #dbg_value(i64 poison, [[META91:![0-9]+]], !DIExpression(), [[META94:![0-9]+]])
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG95:![0-9]+]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG95]]
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META92:![0-9]+]], !DIExpression(), [[DBG95]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG96:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META93:![0-9]+]], !DIExpression(), [[DBG96]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG97:![0-9]+]]
 ;
   %x = xor i64 %a, -1
   %cmp = icmp ult i64 %x, %b
@@ -170,6 +271,15 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_commuted(
+; DEBUG-NEXT:      #dbg_value(i64 poison, [[META100:![0-9]+]], !DIExpression(), [[META103:![0-9]+]])
+; DEBUG-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG104:![0-9]+]]
+; DEBUG-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG104]]
+; DEBUG-NEXT:      #dbg_value(i1 [[OV]], [[META101:![0-9]+]], !DIExpression(), [[DBG104]])
+; DEBUG-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG105:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[Q]], [[META102:![0-9]+]], !DIExpression(), [[DBG105]])
+; DEBUG-NEXT:    ret i64 [[Q]], !dbg [[DBG106:![0-9]+]]
 ;
   %x = xor i64 %a, -1
   %cmp = icmp ult i64 %x, %b
@@ -186,6 +296,16 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
 ; CHECK-NEXT:    call void @use(i64 [[X]])
 ; CHECK-NEXT:    ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_multi_use(
+; DEBUG-NEXT:    [[X:%.*]] = xor i64 -1, [[A:%.*]], !dbg [[DBG112:![0-9]+]]
+; DEBUG-NEXT:      #dbg_value(i64 [[X]], [[META109:![0-9]+]], !DIExpression(), [[DBG112]])
+; DEBUG-NEXT:    [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]], !dbg [[DBG113:![0...
[truncated]

github-actions · 2025-07-06T15:35:09Z

✅ With the latest revision this PR passed the C/C++ code formatter.

…ondition We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.

It can only be beneficial and is lowered to a simple adds + carry check.

Pre-commit tests (NFC)

4089c5d

llvmbot added backend:X86 llvm:codegen llvm:transforms labels Jul 6, 2025

[CodeGenPrepare] Prepare the usubo check with the opposite overflow c…

32c932b

…ondition We check the overflow condition before codegenprepare makes it into the intrinsic, but not the opposite.

AZero13 force-pushed the uaddoverflow branch from 3b7b235 to 32c932b Compare July 6, 2025 15:40

AZero13 marked this pull request as draft July 6, 2025 16:30

[ARM][AArch64] Allow usubo to be used as usubo overflow ops

40555c7

It can only be beneficial and is lowered to a simple adds + carry check.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194

[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194

Uh oh!

AZero13 commented Jul 6, 2025

Uh oh!

llvmbot commented Jul 6, 2025

Uh oh!

llvmbot commented Jul 6, 2025

Uh oh!

github-actions bot commented Jul 6, 2025 •

edited

Loading

Uh oh!

Uh oh!

[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194

Are you sure you want to change the base?

[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194

Uh oh!

Conversation

AZero13 commented Jul 6, 2025

Uh oh!

llvmbot commented Jul 6, 2025

Uh oh!

llvmbot commented Jul 6, 2025

Uh oh!

github-actions bot commented Jul 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jul 6, 2025 •

edited

Loading