Skip to content

[CodeGenPrepare] Prepare the usubo check with the opposite overflow condition #147194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 75 additions & 2 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,10 +470,12 @@ class CodeGenPrepare {

bool tryToSinkFreeOperands(Instruction *I);
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
CmpInst *Cmp, Intrinsic::ID IID);
CmpInst *Cmp, Intrinsic::ID IID,
bool NegateOverflow = false);
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool optimizeURem(Instruction *Rem);
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool combineToUSubWithNegatedOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool unfoldPowerOf2Test(CmpInst *Cmp);
void verifyBFIUpdates(Function &F);
Expand Down Expand Up @@ -1552,7 +1554,8 @@ static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp,
Intrinsic::ID IID) {
Intrinsic::ID IID,
bool NegateOverflow) {
auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
if (!isIVIncrement(BO, LI))
return false;
Expand Down Expand Up @@ -1624,6 +1627,8 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
assert(BO->hasOneUse() &&
"Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
if (NegateOverflow)
OV = Builder.CreateXor(OV, ConstantInt::getAllOnesValue(OV->getType()));
replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
Cmp->eraseFromParent();
BO->eraseFromParent();
Expand Down Expand Up @@ -1759,6 +1764,71 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
return true;
}

bool CodeGenPrepare::combineToUSubWithNegatedOverflow(CmpInst *Cmp,
ModifyDT &ModifiedDT) {
// We are not expecting non-canonical/degenerate code. Just bail out.
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
if (isa<Constant>(A) && isa<Constant>(B))
return false;

// Convert (A u<= B) to (A u>= B) to simplify pattern matching.
ICmpInst::Predicate Pred = Cmp->getPredicate();
if (Pred == ICmpInst::ICMP_ULE) {
std::swap(A, B);
Pred = ICmpInst::ICMP_UGE;
}
// Convert special-case: (A != 0) is the same as (A u>= 1).
if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
B = ConstantInt::get(B->getType(), 1);
Pred = ICmpInst::ICMP_UGE;
}

// Convert special-case: (A == 0) is the same as (0 u>= A).
if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
std::swap(A, B);
Pred = ICmpInst::ICMP_UGE;
}

if (Pred != ICmpInst::ICMP_UGE)
return false;

// Walk the users of a variable operand of a compare looking for a subtract or
// add with that same operand. Also match the 2nd operand of the compare to
// the add/sub, but that may be a negated constant operand of an add.
Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
BinaryOperator *Sub = nullptr;
for (User *U : CmpVariableOperand->users()) {
// A - B, A u> B --> usubo(A, B)
if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
Sub = cast<BinaryOperator>(U);
break;
}

// A + (-C), A u> C (canonicalized form of (sub A, C))
const APInt *CmpC, *AddC;
if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
Sub = cast<BinaryOperator>(U);
break;
}
}
if (!Sub)
return false;

if (!TLI->shouldFormOverflowOp(ISD::USUBO,
TLI->getValueType(*DL, Sub->getType()),
Sub->hasNUsesOrMore(1)))
return false;

if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
Cmp, Intrinsic::usub_with_overflow, true))
return false;

// Reset callers - do not crash by iterating over a dead instruction.
ModifiedDT = ModifyDT::ModifyInstDT;
return true;
}

// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
// The same transformation exists in DAG combiner, but we repeat it here because
// DAG builder can break the pattern by moving icmp into a successor block.
Expand Down Expand Up @@ -2224,6 +2294,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;

if (combineToUSubWithNegatedOverflow(Cmp, ModifiedDT))
return true;

if (unfoldPowerOf2Test(Cmp))
return true;

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/abdu-neg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_cmp_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movzbl %sil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
Expand Down
49 changes: 30 additions & 19 deletions llvm/test/CodeGen/X86/atomicrmw-cond-sub-clamp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
; CHECK-32-NEXT: je .LBB0_4
; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-32-NEXT: movb %al, %ch
; CHECK-32-NEXT: subb %cl, %ch
; CHECK-32-NEXT: movb %al, %ah
; CHECK-32-NEXT: subb %cl, %ah
; CHECK-32-NEXT: jae .LBB0_3
; CHECK-32-NEXT: jb .LBB0_3
; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
; CHECK-32-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-32-NEXT: movb %al, %ah
; CHECK-32-NEXT: movb %ch, %ah
; CHECK-32-NEXT: jmp .LBB0_3
; CHECK-32-NEXT: .LBB0_4: # %atomicrmw.end
; CHECK-32-NEXT: retl
Expand Down Expand Up @@ -67,8 +68,7 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
; CHECK-32-NEXT: movl %eax, %esi
; CHECK-32-NEXT: subw %cx, %si
; CHECK-32-NEXT: jae .LBB1_3
; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
; CHECK-32-NEXT: # in Loop: Header=BB1_1 Depth=1
; CHECK-32-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
; CHECK-32-NEXT: movl %eax, %esi
; CHECK-32-NEXT: jmp .LBB1_3
; CHECK-32-NEXT: .LBB1_4: # %atomicrmw.end
Expand Down Expand Up @@ -100,9 +100,12 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
; CHECK-32-LABEL: atomicrmw_usub_cond_i32:
; CHECK-32: # %bb.0:
; CHECK-32-NEXT: pushl %esi
; CHECK-32-NEXT: pushl %edi
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
; CHECK-32-NEXT: .cfi_offset %esi, -8
; CHECK-32-NEXT: pushl %esi
; CHECK-32-NEXT: .cfi_def_cfa_offset 12
; CHECK-32-NEXT: .cfi_offset %esi, -12
; CHECK-32-NEXT: .cfi_offset %edi, -8
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-32-NEXT: movl (%edx), %eax
Expand All @@ -114,15 +117,18 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
; CHECK-32-NEXT: je .LBB2_4
; CHECK-32-NEXT: .LBB2_1: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-32-NEXT: movl %eax, %edi
; CHECK-32-NEXT: subl %ecx, %edi
; CHECK-32-NEXT: movl %eax, %esi
; CHECK-32-NEXT: subl %ecx, %esi
; CHECK-32-NEXT: jae .LBB2_3
; CHECK-32-NEXT: jb .LBB2_3
; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
; CHECK-32-NEXT: # in Loop: Header=BB2_1 Depth=1
; CHECK-32-NEXT: movl %eax, %esi
; CHECK-32-NEXT: movl %edi, %esi
; CHECK-32-NEXT: jmp .LBB2_3
; CHECK-32-NEXT: .LBB2_4: # %atomicrmw.end
; CHECK-32-NEXT: popl %esi
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
; CHECK-32-NEXT: popl %edi
; CHECK-32-NEXT: .cfi_def_cfa_offset 4
; CHECK-32-NEXT: retl
;
Expand Down Expand Up @@ -158,30 +164,35 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; CHECK-32-NEXT: .cfi_offset %edi, -16
; CHECK-32-NEXT: .cfi_offset %ebx, -12
; CHECK-32-NEXT: .cfi_offset %ebp, -8
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-32-NEXT: movl (%ebp), %eax
; CHECK-32-NEXT: movl 4(%ebp), %edx
; CHECK-32-NEXT: movl 4(%ebp), %ecx
; CHECK-32-NEXT: jmp .LBB3_1
; CHECK-32-NEXT: .p2align 4
; CHECK-32-NEXT: .LBB3_3: # %atomicrmw.start
; CHECK-32-NEXT: # in Loop: Header=BB3_1 Depth=1
; CHECK-32-NEXT: movl %ecx, %edx
; CHECK-32-NEXT: movl %edi, %ecx
; CHECK-32-NEXT: movl %esi, %ebx
; CHECK-32-NEXT: lock cmpxchg8b (%ebp)
; CHECK-32-NEXT: movl %edx, %ecx
; CHECK-32-NEXT: je .LBB3_4
; CHECK-32-NEXT: .LBB3_1: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-32-NEXT: movl %eax, %ebx
; CHECK-32-NEXT: subl %edi, %ebx
; CHECK-32-NEXT: movl %edx, %ecx
; CHECK-32-NEXT: sbbl %esi, %ecx
; CHECK-32-NEXT: jae .LBB3_3
; CHECK-32-NEXT: subl {{[0-9]+}}(%esp), %ebx
; CHECK-32-NEXT: movl %ecx, %edx
; CHECK-32-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; CHECK-32-NEXT: movl %ecx, %edi
; CHECK-32-NEXT: movl %eax, %esi
; CHECK-32-NEXT: jb .LBB3_3
; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
; CHECK-32-NEXT: # in Loop: Header=BB3_1 Depth=1
; CHECK-32-NEXT: movl %edx, %ecx
; CHECK-32-NEXT: movl %eax, %ebx
; CHECK-32-NEXT: movl %edx, %edi
; CHECK-32-NEXT: movl %ebx, %esi
; CHECK-32-NEXT: jmp .LBB3_3
; CHECK-32-NEXT: .LBB3_4: # %atomicrmw.end
; CHECK-32-NEXT: movl %ecx, %edx
; CHECK-32-NEXT: popl %esi
; CHECK-32-NEXT: .cfi_def_cfa_offset 16
; CHECK-32-NEXT: popl %edi
Expand Down
70 changes: 46 additions & 24 deletions llvm/test/CodeGen/X86/bmi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1948,27 +1948,35 @@ define void @pr42118_i64(i64 %x) {
define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
; X86-LABEL: blsi_cflag_32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
; X86-NEXT: jne .LBB59_1
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: jb .LBB59_1
; X86-NEXT: # %bb.2:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
; X86-NEXT: .LBB59_1:
; X86-NEXT: blsil %eax, %eax
; X86-NEXT: andl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: blsi_cflag_32:
; X64: # %bb.0:
; X64-NEXT: blsil %edi, %eax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: negl %edi
; X64-NEXT: cmovael %esi, %eax
; X64-NEXT: retq
;
; EGPR-LABEL: blsi_cflag_32:
; EGPR: # %bb.0:
; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
; EGPR-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: negl %eax # encoding: [0xf7,0xd8]
; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
; EGPR-NEXT: negl %edi # encoding: [0xf7,0xdf]
; EGPR-NEXT: cmovael %esi, %eax # encoding: [0x0f,0x43,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%tobool = icmp eq i32 %x, 0
%sub = sub nsw i32 0, %x
Expand All @@ -1980,40 +1988,54 @@ define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
; X86-LABEL: blsi_cflag_64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: negl %eax
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: orl %esi, %edi
; X86-NEXT: jne .LBB60_1
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: negl %edi
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: setae %bl
; X86-NEXT: jb .LBB60_1
; X86-NEXT: # %bb.2:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: jmp .LBB60_3
; X86-NEXT: testb %bl, %bl
; X86-NEXT: jne .LBB60_5
; X86-NEXT: .LBB60_4:
; X86-NEXT: andl %ecx, %edx
; X86-NEXT: jmp .LBB60_6
; X86-NEXT: .LBB60_1:
; X86-NEXT: andl %esi, %edx
; X86-NEXT: andl %ecx, %eax
; X86-NEXT: .LBB60_3:
; X86-NEXT: andl %esi, %eax
; X86-NEXT: testb %bl, %bl
; X86-NEXT: je .LBB60_4
; X86-NEXT: .LBB60_5:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: .LBB60_6:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: blsi_cflag_64:
; X64: # %bb.0:
; X64-NEXT: blsiq %rdi, %rax
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: negq %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: negq %rdi
; X64-NEXT: cmovaeq %rsi, %rax
; X64-NEXT: retq
;
; EGPR-LABEL: blsi_cflag_64:
; EGPR: # %bb.0:
; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
; EGPR-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: negq %rax # encoding: [0x48,0xf7,0xd8]
; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
; EGPR-NEXT: negq %rdi # encoding: [0x48,0xf7,0xdf]
; EGPR-NEXT: cmovaeq %rsi, %rax # encoding: [0x48,0x0f,0x43,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%tobool = icmp eq i64 %x, 0
%sub = sub nsw i64 0, %x
Expand Down
Loading
Loading