diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 28f6968ee6caf..443db4391a523 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -154,73 +154,114 @@ void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O, llvm_unreachable("Invalid conversion modifier"); } +void NVPTXInstPrinter::printFTZFlag(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + const int Imm = MO.getImm(); + if (Imm) + O << ".ftz"; +} + void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O, StringRef Modifier) { const MCOperand &MO = MI->getOperand(OpNum); int64_t Imm = MO.getImm(); - if (Modifier == "ftz") { - // FTZ flag - if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG) - O << ".ftz"; - return; - } else if (Modifier == "base") { - switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) { + if (Modifier == "FCmp") { + switch (Imm) { default: return; case NVPTX::PTXCmpMode::EQ: - O << ".eq"; + O << "eq"; return; case NVPTX::PTXCmpMode::NE: - O << ".ne"; + O << "ne"; return; case NVPTX::PTXCmpMode::LT: - O << ".lt"; + O << "lt"; return; case NVPTX::PTXCmpMode::LE: - O << ".le"; + O << "le"; return; case NVPTX::PTXCmpMode::GT: - O << ".gt"; + O << "gt"; return; case NVPTX::PTXCmpMode::GE: - O << ".ge"; - return; - case NVPTX::PTXCmpMode::LO: - O << ".lo"; - return; - case NVPTX::PTXCmpMode::LS: - O << ".ls"; - return; - case NVPTX::PTXCmpMode::HI: - O << ".hi"; - return; - case NVPTX::PTXCmpMode::HS: - O << ".hs"; + O << "ge"; return; case NVPTX::PTXCmpMode::EQU: - O << ".equ"; + O << "equ"; return; case NVPTX::PTXCmpMode::NEU: - O << ".neu"; + O << "neu"; return; case NVPTX::PTXCmpMode::LTU: - O << ".ltu"; + O << "ltu"; return; case NVPTX::PTXCmpMode::LEU: - O << ".leu"; + O << "leu"; return; case NVPTX::PTXCmpMode::GTU: - O << ".gtu"; + O << "gtu"; return; case NVPTX::PTXCmpMode::GEU: - O << ".geu"; + O << "geu"; return; case NVPTX::PTXCmpMode::NUM: - O << ".num"; + O << "num"; return; case NVPTX::PTXCmpMode::NotANumber: - O << ".nan"; + O << "nan"; + return; + } + } + if (Modifier == "ICmp") { + switch (Imm) { + default: + llvm_unreachable("Invalid ICmp mode"); + case NVPTX::PTXCmpMode::EQ: + O << "eq"; + return; + case NVPTX::PTXCmpMode::NE: + O << "ne"; + return; + case NVPTX::PTXCmpMode::LT: + case NVPTX::PTXCmpMode::LTU: + O << "lt"; + return; + case NVPTX::PTXCmpMode::LE: + case NVPTX::PTXCmpMode::LEU: + O << "le"; + return; + case NVPTX::PTXCmpMode::GT: + case NVPTX::PTXCmpMode::GTU: + O << "gt"; + return; + case NVPTX::PTXCmpMode::GE: + case NVPTX::PTXCmpMode::GEU: + O << "ge"; + return; + } + } + if (Modifier == "IType") { + switch (Imm) { + default: + llvm_unreachable("Invalid IType"); + case NVPTX::PTXCmpMode::EQ: + case NVPTX::PTXCmpMode::NE: + O << "b"; + return; + case NVPTX::PTXCmpMode::LT: + case NVPTX::PTXCmpMode::LE: + case NVPTX::PTXCmpMode::GT: + case NVPTX::PTXCmpMode::GE: + O << "s"; + return; + case NVPTX::PTXCmpMode::LTU: + case NVPTX::PTXCmpMode::LEU: + case NVPTX::PTXCmpMode::GTU: + case NVPTX::PTXCmpMode::GEU: + O << "u"; return; } } diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index 6189284e8a58c..193c436939f66 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -54,6 +54,7 @@ class NVPTXInstPrinter : public MCInstPrinter { void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O); void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O, StringRef Modifier = {}); + void printFTZFlag(const MCInst *MI, int OpNum, raw_ostream &O); }; } diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index b7fd7090299a9..e7eefe729e9f1 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -14,12 +14,12 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H #define LLVM_LIB_TARGET_NVPTX_NVPTX_H +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" - namespace llvm { class FunctionPass; class MachineFunctionPass; @@ -218,28 +218,21 @@ enum CvtMode { /// PTXCmpMode - Comparison mode enumeration namespace PTXCmpMode { enum CmpMode { - EQ = 0, - NE, - LT, - LE, - GT, - GE, - LO, - LS, - HI, - HS, - EQU, - NEU, - LTU, - LEU, - GTU, - GEU, - NUM, + EQ = ISD::SETEQ, + NE = ISD::SETNE, + LT = ISD::SETLT, + LE = ISD::SETLE, + GT = ISD::SETGT, + GE = ISD::SETGE, + EQU = ISD::SETUEQ, + NEU = ISD::SETUNE, + LTU = ISD::SETULT, + LEU = ISD::SETULE, + GTU = ISD::SETUGT, + GEU = ISD::SETUGE, + NUM = ISD::SETO, // NAN is a MACRO - NotANumber, - - BASE_MASK = 0xFF, - FTZ_FLAG = 0x100 + NotANumber = ISD::SETUO, }; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 5631342ecc13e..429d52fb6f230 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -363,23 +363,29 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { // Map ISD:CONDCODE value to appropriate CmpMode expected by // NVPTXInstPrinter::printCmpMode() -static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { +SDValue NVPTXDAGToDAGISel::getPTXCmpMode(const CondCodeSDNode &CondCode) { using NVPTX::PTXCmpMode::CmpMode; - unsigned PTXCmpMode = [](ISD::CondCode CC) { + const unsigned PTXCmpMode = [](ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unexpected condition code."); case ISD::SETOEQ: + case ISD::SETEQ: return CmpMode::EQ; case ISD::SETOGT: + case ISD::SETGT: return CmpMode::GT; case ISD::SETOGE: + case ISD::SETGE: return CmpMode::GE; case ISD::SETOLT: + case ISD::SETLT: return CmpMode::LT; case ISD::SETOLE: + case ISD::SETLE: return CmpMode::LE; case ISD::SETONE: + case ISD::SETNE: return CmpMode::NE; case ISD::SETO: return CmpMode::NUM; @@ -397,45 +403,29 @@ static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { return CmpMode::LEU; case ISD::SETUNE: return CmpMode::NEU; - case ISD::SETEQ: - return CmpMode::EQ; - case ISD::SETGT: - return CmpMode::GT; - case ISD::SETGE: - return CmpMode::GE; - case ISD::SETLT: - return CmpMode::LT; - case ISD::SETLE: - return CmpMode::LE; - case ISD::SETNE: - return CmpMode::NE; } }(CondCode.get()); - - if (FTZ) - PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG; - - return PTXCmpMode; + return CurDAG->getTargetConstant(PTXCmpMode, SDLoc(), MVT::i32); } bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) { - unsigned PTXCmpMode = - getPTXCmpMode(*cast(N->getOperand(2)), useF32FTZ()); + SDValue PTXCmpMode = getPTXCmpMode(*cast(N->getOperand(2))); SDLoc DL(N); SDNode *SetP = CurDAG->getMachineNode( - NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), - N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); + NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, + {N->getOperand(0), N->getOperand(1), PTXCmpMode, + CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)}); ReplaceNode(N, SetP); return true; } bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) { - unsigned PTXCmpMode = - getPTXCmpMode(*cast(N->getOperand(2)), useF32FTZ()); + SDValue PTXCmpMode = getPTXCmpMode(*cast(N->getOperand(2))); SDLoc DL(N); SDNode *SetP = CurDAG->getMachineNode( - NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), - N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); + NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, + {N->getOperand(0), N->getOperand(1), PTXCmpMode, + CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)}); ReplaceNode(N, SetP); return true; } @@ -1953,7 +1943,7 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) { llvm_unreachable("Unexpected opcode"); }; - int Opcode = IsVec ? NVPTX::BFMA16x2rrr : NVPTX::BFMA16rrr; + int Opcode = IsVec ? NVPTX::FMA_BF16x2rrr : NVPTX::FMA_BF16rrr; MachineSDNode *FMA = CurDAG->getMachineNode(Opcode, DL, VT, Operands); ReplaceNode(N, FMA); return true; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 0e4dec1adca67..b314c4ccefe8b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -104,12 +104,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { } bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset); + SDValue getPTXCmpMode(const CondCodeSDNode &CondCode); SDValue selectPossiblyImm(SDValue V); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; - static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N); - // Returns the Memory Order and Scope that the PTX memory instruction should // use, and inserts appropriate fence instruction before the memory // instruction, if needed to implement the instructions memory order. Required diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 1a2515b7f66f3..9ef9ce3b7bb8d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -68,48 +68,28 @@ def CvtMode : Operand { let PrintMethod = "printCvtMode"; } +// FTZ flag + +def FTZ : PatLeaf<(i1 1)>; +def NoFTZ : PatLeaf<(i1 0)>; + +def getFTZFlag : SDNodeXFormgetTargetConstant(useF32FTZ() ? 1 : 0, SDLoc(), MVT::i1); +}]>; + +def FTZFlag : OperandWithDefaultOps { + let PrintMethod = "printFTZFlag"; +} + // Compare modes // These must match the enum in NVPTX.h -def CmpEQ : PatLeaf<(i32 0)>; -def CmpNE : PatLeaf<(i32 1)>; -def CmpLT : PatLeaf<(i32 2)>; -def CmpLE : PatLeaf<(i32 3)>; -def CmpGT : PatLeaf<(i32 4)>; -def CmpGE : PatLeaf<(i32 5)>; -def CmpLO : PatLeaf<(i32 6)>; -def CmpLS : PatLeaf<(i32 7)>; -def CmpHI : PatLeaf<(i32 8)>; -def CmpHS : PatLeaf<(i32 9)>; -def CmpEQU : PatLeaf<(i32 10)>; -def CmpNEU : PatLeaf<(i32 11)>; -def CmpLTU : PatLeaf<(i32 12)>; -def CmpLEU : PatLeaf<(i32 13)>; -def CmpGTU : PatLeaf<(i32 14)>; -def CmpGEU : PatLeaf<(i32 15)>; -def CmpNUM : PatLeaf<(i32 16)>; -def CmpNAN : PatLeaf<(i32 17)>; - -def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; -def CmpNE_FTZ : PatLeaf<(i32 0x101)>; -def CmpLT_FTZ : PatLeaf<(i32 0x102)>; -def CmpLE_FTZ : PatLeaf<(i32 0x103)>; -def CmpGT_FTZ : PatLeaf<(i32 0x104)>; -def CmpGE_FTZ : PatLeaf<(i32 0x105)>; -def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; -def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; -def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; -def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; -def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; -def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; -def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; -def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; +def CmpEQ : PatLeaf<(i32 17)>; +def CmpNE : PatLeaf<(i32 22)>; def CmpMode : Operand { let PrintMethod = "printCmpMode"; } -def VecElement : Operand { - let PrintMethod = "printVecElement"; -} // PRMT modes // These must match the enum in NVPTX.h @@ -152,8 +132,6 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def hasDotInstructions : Predicate<"Subtarget->hasDotInstructions()">; def hasTcgen05Instructions : Predicate<"Subtarget->hasTcgen05Instructions()">; -def True : Predicate<"true">; - class hasPTX: Predicate<"Subtarget->getPTXVersion() >= " # version>; class hasSM: Predicate<"Subtarget->getSmVersion() >= " # version>; @@ -198,7 +176,7 @@ def RI64 : Operand; // Utility class to wrap up information about a register and DAG type for more // convenient iteration and parameterization -class RegTyInfo { ValueType Ty = ty; NVPTXRegClass RC = rc; @@ -206,20 +184,21 @@ class RegTyInfo; -def I16RT : RegTyInfo; -def I32RT : RegTyInfo; -def I64RT : RegTyInfo; +def I1RT : RegTyInfo; +def I16RT : RegTyInfo; +def I32RT : RegTyInfo; +def I64RT : RegTyInfo; -def F32RT : RegTyInfo; -def F64RT : RegTyInfo; -def F16RT : RegTyInfo; -def BF16RT : RegTyInfo; +def F32RT : RegTyInfo; +def F64RT : RegTyInfo; +def F16RT : RegTyInfo; +def BF16RT : RegTyInfo; -def F16X2RT : RegTyInfo; -def BF16X2RT : RegTyInfo; +def F16X2RT : RegTyInfo; +def BF16X2RT : RegTyInfo; // This class provides a basic wrapper around an NVPTXInst that abstracts the @@ -321,76 +300,57 @@ multiclass ADD_SUB_INT_CARRY { // Also defines ftz (flush subnormal inputs and results to sign-preserving // zero) variants for fp32 functions. multiclass FMINIMUMMAXIMUM { + defvar nan_str = !if(NaN, ".NaN", ""); if !not(NaN) then { - def f64rr : + def _f64_rr : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, B64:$b), OpcStr # ".f64", [(set f64:$dst, (OpNode f64:$a, f64:$b))]>; - def f64ri : + def _f64_ri : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, f64imm:$b), OpcStr # ".f64", [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>; } - def f32rr_ftz : - BasicNVPTXInst<(outs B32:$dst), + def _f32_rr : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - OpcStr # ".ftz.f32", - [(set f32:$dst, (OpNode f32:$a, f32:$b))]>, - Requires<[doF32FTZ]>; - def f32ri_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, f32imm:$b), - OpcStr # ".ftz.f32", - [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; - def f32rr : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - OpcStr # ".f32", + (ins FTZFlag:$ftz), + OpcStr # "$ftz" # nan_str # ".f32", [(set f32:$dst, (OpNode f32:$a, f32:$b))]>; - def f32ri : - BasicNVPTXInst<(outs B32:$dst), + def _f32_ri : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, f32imm:$b), - OpcStr # ".f32", + (ins FTZFlag:$ftz), + OpcStr # "$ftz" # nan_str # ".f32", [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>; - def f16rr_ftz : - BasicNVPTXInst<(outs B16:$dst), - (ins B16:$a, B16:$b), - OpcStr # ".ftz.f16", - [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, - Requires<[useFP16Math, doF32FTZ]>; - def f16rr : - BasicNVPTXInst<(outs B16:$dst), + def _f16_rr : + BasicFlagsNVPTXInst<(outs B16:$dst), (ins B16:$a, B16:$b), - OpcStr # ".f16", + (ins FTZFlag:$ftz), + OpcStr # "$ftz" # nan_str # ".f16", [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, - Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; + Requires<[useFP16Math]>; - def f16x2rr_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - OpcStr # ".ftz.f16x2", - [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, - Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>; - def f16x2rr : - BasicNVPTXInst<(outs B32:$dst), + def _f16x2_rr : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - OpcStr # ".f16x2", + (ins FTZFlag:$ftz), + OpcStr # "$ftz" # nan_str # ".f16x2", [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; - def bf16rr : + def _bf16_rr : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a, B16:$b), - OpcStr # ".bf16", + OpcStr # nan_str # ".bf16", [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>, Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; - def bf16x2rr : + def _bf16x2_rr : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - OpcStr # ".bf16x2", + OpcStr # nan_str # ".bf16x2", [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; } @@ -415,52 +375,31 @@ multiclass F3 { (ins B64:$a, f64imm:$b), op_str # ".f64", [(set f64:$dst, (op_pat f64:$a, fpimm:$b))]>; - def f32rr_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - op_str # ".ftz.f32", - [(set f32:$dst, (op_pat f32:$a, f32:$b))]>, - Requires<[doF32FTZ]>; - def f32ri_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, f32imm:$b), - op_str # ".ftz.f32", - [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; def f32rr : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - op_str # ".f32", + (ins FTZFlag:$ftz), + op_str # "$ftz.f32", [(set f32:$dst, (op_pat f32:$a, f32:$b))]>; def f32ri : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, f32imm:$b), - op_str # ".f32", + (ins FTZFlag:$ftz), + op_str # "$ftz.f32", [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>; - def f16rr_ftz : - BasicNVPTXInst<(outs B16:$dst), - (ins B16:$a, B16:$b), - op_str # ".ftz.f16", - [(set f16:$dst, (op_pat f16:$a, f16:$b))]>, - Requires<[useFP16Math, doF32FTZ]>; def f16rr : - BasicNVPTXInst<(outs B16:$dst), + BasicFlagsNVPTXInst<(outs B16:$dst), (ins B16:$a, B16:$b), - op_str # ".f16", + (ins FTZFlag:$ftz), + op_str # "$ftz.f16", [(set f16:$dst, (op_pat f16:$a, f16:$b))]>, Requires<[useFP16Math]>; - - def f16x2rr_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - op_str # ".ftz.f16x2", - [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>, - Requires<[useFP16Math, doF32FTZ]>; def f16x2rr : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - op_str # ".f16x2", + (ins FTZFlag:$ftz), + op_str # "$ftz.f16x2", [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math]>; def bf16rr : @@ -493,15 +432,12 @@ multiclass F3_fma_component { // instructions: .f64, .f32, and .ftz.f32 (flush // subnormal inputs and results to zero). multiclass F2 { - def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a), + def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a), OpcStr # ".f64", [(set f64:$dst, (OpNode f64:$a))]>; - def f32_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), - OpcStr # ".ftz.f32", - [(set f32:$dst, (OpNode f32:$a))]>, - Requires<[doF32FTZ]>; - def f32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), - OpcStr # ".f32", + def f32 : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a), + (ins FTZFlag:$ftz), + OpcStr # "$ftz.f32", [(set f32:$dst, (OpNode f32:$a))]>; } @@ -514,37 +450,19 @@ multiclass F2_Support_Half { OpcStr # ".bf16x2", [(set v2bf16:$dst, (OpNode v2bf16:$a))]>, Requires<[hasSM<80>, hasPTX<70>]>; - def f16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), - OpcStr # ".ftz.f16", - [(set f16:$dst, (OpNode f16:$a))]>, - Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; - def f16x2_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), - OpcStr # ".ftz.f16x2", - [(set v2f16:$dst, (OpNode v2f16:$a))]>, - Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; - def f16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), - OpcStr # ".f16", + def f16 : BasicFlagsNVPTXInst<(outs B16:$dst), (ins B16:$a), + (ins FTZFlag:$ftz), + OpcStr # "$ftz.f16", [(set f16:$dst, (OpNode f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>]>; - def f16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), - OpcStr # ".f16x2", + def f16x2 : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a), + (ins FTZFlag:$ftz), + OpcStr # "$ftz.f16x2", [(set v2f16:$dst, (OpNode v2f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>]>; } -// Variant where only .ftz.bf16 is supported. -multiclass F2_Support_Half_BF { - def bf16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), - OpcStr # ".ftz.bf16", - [(set bf16:$dst, (OpNode bf16:$a))]>, - Requires<[hasSM<90>, hasPTX<78>]>; - def bf16x2_ftz: BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), - OpcStr # ".ftz.bf16x2", - [(set v2bf16:$dst, (OpNode v2bf16:$a))]>, - Requires<[hasSM<90>, hasPTX<78>]>; -} - //===----------------------------------------------------------------------===// // NVPTX Instructions. //===----------------------------------------------------------------------===// @@ -992,48 +910,38 @@ def SHL2MUL16 : SDNodeXForm; // Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. -def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)), - (MULWIDES64Imm $a, (SHL2MUL32 $b))>, - Requires<[doMulWide]>; -def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)), - (MULWIDEU64Imm $a, (SHL2MUL32 $b))>, - Requires<[doMulWide]>; - -def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)), - (MULWIDES32Imm $a, (SHL2MUL16 $b))>, - Requires<[doMulWide]>; -def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)), - (MULWIDEU32Imm $a, (SHL2MUL16 $b))>, - Requires<[doMulWide]>; - -// Convert "sign/zero-extend then multiply" to mul.wide. -def : Pat<(mul (sext i32:$a), (sext i32:$b)), - (MULWIDES64 $a, $b)>, - Requires<[doMulWide]>; -def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)), - (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>, - Requires<[doMulWide]>; - -def : Pat<(mul (zext i32:$a), (zext i32:$b)), - (MULWIDEU64 $a, $b)>, - Requires<[doMulWide]>; -def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)), - (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>, - Requires<[doMulWide]>; - -def : Pat<(mul (sext i16:$a), (sext i16:$b)), - (MULWIDES32 $a, $b)>, - Requires<[doMulWide]>; -def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)), - (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>, - Requires<[doMulWide]>; - -def : Pat<(mul (zext i16:$a), (zext i16:$b)), - (MULWIDEU32 $a, $b)>, - Requires<[doMulWide]>; -def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)), - (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>, - Requires<[doMulWide]>; +let Predicates = [doMulWide] in { + def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)), + (MULWIDES64Imm $a, (SHL2MUL32 $b))>; + def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)), + (MULWIDEU64Imm $a, (SHL2MUL32 $b))>; + + def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)), + (MULWIDES32Imm $a, (SHL2MUL16 $b))>; + def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)), + (MULWIDEU32Imm $a, (SHL2MUL16 $b))>; + + // Convert "sign/zero-extend then multiply" to mul.wide. + def : Pat<(mul (sext i32:$a), (sext i32:$b)), + (MULWIDES64 $a, $b)>; + def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)), + (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>; + + def : Pat<(mul (zext i32:$a), (zext i32:$b)), + (MULWIDEU64 $a, $b)>; + def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)), + (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>; + + def : Pat<(mul (sext i16:$a), (sext i16:$b)), + (MULWIDES32 $a, $b)>; + def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)), + (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>; + + def : Pat<(mul (zext i16:$a), (zext i16:$b)), + (MULWIDEU32 $a, $b)>; + def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)), + (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>; +} // // Integer multiply-add @@ -1101,10 +1009,10 @@ defm FADD : F3_fma_component<"add", fadd>; defm FSUB : F3_fma_component<"sub", fsub>; defm FMUL : F3_fma_component<"mul", fmul>; -defm FMIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>; -defm FMAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>; -defm FMINNAN : FMINIMUMMAXIMUM<"min.NaN", /* NaN */ true, fminimum>; -defm FMAXNAN : FMINIMUMMAXIMUM<"max.NaN", /* NaN */ true, fmaximum>; +defm MIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>; +defm MAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>; +defm MIN_NAN : FMINIMUMMAXIMUM<"min", /* NaN */ true, fminimum>; +defm MAX_NAN : FMINIMUMMAXIMUM<"max", /* NaN */ true, fmaximum>; defm FABS : F2<"abs", fabs>; defm FNEG : F2<"neg", fneg>; @@ -1113,36 +1021,43 @@ defm FNEG_H: F2_Support_Half<"neg", fneg>; defm FSQRT : F2<"sqrt.rn", fsqrt>; -defm FEXP2_H: F2_Support_Half_BF<"ex2.approx", fexp2>; - // // F16 NEG // -class FNEG_F16_F16X2 : - BasicNVPTXInst<(outs RC:$dst), (ins RC:$src), - OpcStr, - [(set T:$dst, (fneg T:$src))]>, - Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; -def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, B16, doF32FTZ>; -def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, B16, True>; -def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, B32, doF32FTZ>; -def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, B32, True>; +class FNEG16 : + BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), (ins FTZFlag:$ftz), + "neg$ftz." # t.Str, + [(set t.Ty:$dst, (fneg t.Ty:$src))]>; + +let Predicates = [useFP16Math, hasPTX<60>, hasSM<53>] in { + def NEG_F16 : FNEG16; + def NEG_F16x2 : FNEG16; +} +let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in { + def NEG_BF16 : FNEG16; + def NEG_BF16x2 : FNEG16; +} // -// BF16 NEG +// EX2 // -class FNEG_BF16_F16X2 : - BasicNVPTXInst<(outs RC:$dst), (ins RC:$src), - OpcStr, - [(set T:$dst, (fneg T:$src))]>, - Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>; -def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, B16, doF32FTZ>; -def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, B16, True>; -def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, B32, doF32FTZ>; -def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, B32, True>; +class FEXP2Inst : + BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), + flags, "ex2.approx" # flag_str # "." # t.Str, + [(set t.Ty:$dst, (fexp2 t.Ty:$src))]>; + +def EX2_APPROX_f32 : FEXP2Inst; + +let Predicates = [useFP16Math, hasPTX<70>, hasSM<75>] in { + def EX2_APPROX_f16 : FEXP2Inst; + def EX2_APPROX_f16x2 : FEXP2Inst; +} +let Predicates = [hasPTX<78>, hasSM<90>] in { + def EX2_APPROX_bf16 : FEXP2Inst; + def EX2_APPROX_bf16x2 : FEXP2Inst; +} -// // F64 division // def FRCP64r : @@ -1176,42 +1091,27 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b), }]>; -def FRCP32_approx_r_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$b), - "rcp.approx.ftz.f32", - [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>, - Requires<[doF32FTZ]>; def FRCP32_approx_r : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$b), - "rcp.approx.f32", + (ins FTZFlag:$ftz), + "rcp.approx$ftz.f32", [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>; // // F32 Approximate division // -def FDIV32approxrr_ftz : - BasicNVPTXInst<(outs B32:$dst), +def FDIV32_approx_rr : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - "div.approx.ftz.f32", - [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>, - Requires<[doF32FTZ]>; -def FDIV32approxri_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, f32imm:$b), - "div.approx.ftz.f32", - [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; -def FDIV32approxrr : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - "div.approx.f32", + (ins FTZFlag:$ftz), + "div.approx$ftz.f32", [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>; -def FDIV32approxri : - BasicNVPTXInst<(outs B32:$dst), +def FDIV32_approx_ri : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, f32imm:$b), - "div.approx.f32", + (ins FTZFlag:$ftz), + "div.approx$ftz.f32", [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>; // // F32 Semi-accurate reciprocal @@ -1225,37 +1125,23 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b), }]>; -def : Pat<(fdiv_full f32imm_1, f32:$b), - (FRCP32_approx_r_ftz $b)>, - Requires<[doF32FTZ]>; - def : Pat<(fdiv_full f32imm_1, f32:$b), (FRCP32_approx_r $b)>; // // F32 Semi-accurate division // -def FDIV32rr_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - "div.full.ftz.f32", - [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>, - Requires<[doF32FTZ]>; -def FDIV32ri_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, f32imm:$b), - "div.full.ftz.f32", - [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; def FDIV32rr : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - "div.full.f32", + (ins FTZFlag:$ftz), + "div.full$ftz.f32", [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>; def FDIV32ri : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, f32imm:$b), - "div.full.f32", + (ins FTZFlag:$ftz), + "div.full$ftz.f32", [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>; // // F32 Accurate reciprocal @@ -1266,86 +1152,73 @@ def fdiv_ftz : PatFrag<(ops node:$a, node:$b), return getDivF32Level(N) == NVPTX::DivPrecisionLevel::IEEE754; }]>; -def FRCP32r_prec_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$b), - "rcp.rn.ftz.f32", - [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>, - Requires<[doF32FTZ]>; def FRCP32r_prec : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$b), - "rcp.rn.f32", - [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>; + (ins FTZFlag:$ftz), + "rcp.rn$ftz.f32", + [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>; // // F32 Accurate division // -def FDIV32rr_prec_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, B32:$b), - "div.rn.ftz.f32", - [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>, - Requires<[doF32FTZ]>; -def FDIV32ri_prec_ftz : - BasicNVPTXInst<(outs B32:$dst), - (ins B32:$a, f32imm:$b), - "div.rn.ftz.f32", - [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; def FDIV32rr_prec : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), - "div.rn.f32", - [(set f32:$dst, (fdiv f32:$a, f32:$b))]>; + (ins FTZFlag:$ftz), + "div.rn$ftz.f32", + [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>; def FDIV32ri_prec : - BasicNVPTXInst<(outs B32:$dst), + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, f32imm:$b), - "div.rn.f32", - [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>; + (ins FTZFlag:$ftz), + "div.rn$ftz.f32", + [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>; + +def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>; +def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>; +def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>; // // FMA // -multiclass FMA Preds = []> { - def rrr : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c), - asmstr, - [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, t.Ty:$c))]>, - Requires; - - if t.SupportsImm then { - def rri : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.RC:$a, t.RC:$b, t.Imm:$c), - asmstr, - [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, fpimm:$c))]>, - Requires; - def rir : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.RC:$a, t.Imm:$b, t.RC:$c), - asmstr, - [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, t.Ty:$c))]>, - Requires; - def rii : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.RC:$a, t.Imm:$b, t.Imm:$c), - asmstr, - [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, fpimm:$c))]>, - Requires; - def iir : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.Imm:$a, t.Imm:$b, t.RC:$c), - asmstr, - [(set t.Ty:$dst, (fma fpimm:$a, fpimm:$b, t.Ty:$c))]>, - Requires; +multiclass FMA preds = []> { + defvar flag_str = !if(allow_ftz, "$ftz", ""); + defvar flag_ops = !if(allow_ftz, (ins FTZFlag:$ftz), (ins)); + defvar op_str = "fma.rn" # flag_str # "." # t.Str; + + let Predicates = preds in { + def rrr : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c), + flag_ops, op_str, + [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, t.Ty:$c))]>; + + if t.SupportsImm then { + def rri : BasicFlagsNVPTXInst<(outs t.RC:$dst), + (ins t.RC:$a, t.RC:$b, t.Imm:$c), + flag_ops, op_str, + [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, fpimm:$c))]>; + def rir : BasicFlagsNVPTXInst<(outs t.RC:$dst), + (ins t.RC:$a, t.Imm:$b, t.RC:$c), + flag_ops, op_str, + [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, t.Ty:$c))]>; + def rii : BasicFlagsNVPTXInst<(outs t.RC:$dst), + (ins t.RC:$a, t.Imm:$b, t.Imm:$c), + flag_ops, op_str, + [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, fpimm:$c))]>; + def iir : BasicFlagsNVPTXInst<(outs t.RC:$dst), + (ins t.Imm:$a, t.Imm:$b, t.RC:$c), + flag_ops, op_str, + [(set t.Ty:$dst, (fma fpimm:$a, fpimm:$b, t.Ty:$c))]>; + } } } -defm FMA16_ftz : FMA<"fma.rn.ftz.f16", F16RT, [useFP16Math, doF32FTZ]>; -defm FMA16 : FMA<"fma.rn.f16", F16RT, [useFP16Math]>; -defm FMA16x2_ftz : FMA<"fma.rn.ftz.f16x2", F16X2RT, [useFP16Math, doF32FTZ]>; -defm FMA16x2 : FMA<"fma.rn.f16x2", F16X2RT, [useFP16Math]>; -defm BFMA16 : FMA<"fma.rn.bf16", BF16RT, [hasBF16Math]>; -defm BFMA16x2 : FMA<"fma.rn.bf16x2", BF16X2RT, [hasBF16Math]>; -defm FMA32_ftz : FMA<"fma.rn.ftz.f32", F32RT, [doF32FTZ]>; -defm FMA32 : FMA<"fma.rn.f32", F32RT>; -defm FMA64 : FMA<"fma.rn.f64", F64RT>; +defm FMA_F16 : FMA; +defm FMA_F16x2 : FMA; +defm FMA_BF16 : FMA; +defm FMA_BF16x2 : FMA; +defm FMA_F32 : FMA; +defm FMA_F64 : FMA; // sin/cos @@ -1355,11 +1228,13 @@ class UnaryOpAllowsApproxFn return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs(); }]>; -def SINF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), - "sin.approx.f32", +def SIN_APPROX_f32 : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz), + "sin.approx$ftz.f32", [(set f32:$dst, (UnaryOpAllowsApproxFn f32:$src))]>; -def COSF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), - "cos.approx.f32", +def COS_APPROX_f32 : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz), + "cos.approx$ftz.f32", [(set f32:$dst, (UnaryOpAllowsApproxFn f32:$src))]>; //----------------------------------- @@ -1636,53 +1511,80 @@ def : Pat<(i16 (sext_inreg (trunc (srl i64:$s, (i32 imm:$o))), i8)), // FIXME: This doesn't cover versions of set and setp that combine with a // boolean predicate, e.g. setp.eq.and.b16. +def cond2cc : SDNodeXForm; -let hasSideEffects = false in { - multiclass SETP { +multiclass FSETP { + defvar ftz_str = !if(allow_ftz, "$ftz", ""); + defvar op_str = "setp.${cmp:FCmp}" # ftz_str # "." # t.Str; + defvar flags = !con((ins CmpMode:$cmp), !if(allow_ftz, (ins FTZFlag:$ftz), (ins))); + let hasSideEffects = false in { def rr : - BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}." # TypeStr>; + BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.RC:$b), + flags, op_str>; + + if t.SupportsImm then { + def ri : + BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.Imm:$b), + flags, op_str>; + def ir : + BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.Imm:$a, t.RC:$b), + flags, op_str>; + } + } + def : Pat<(i1 (setcc t.Ty:$a, t.Ty:$b, cond:$cc)), + (!cast(NAME # "rr") $a, $b, (cond2cc $cc))>; + if t.SupportsImm then { + def : Pat<(i1 (setcc t.Ty:$a, fpimm:$b, cond:$cc)), + (!cast(NAME # "ri") $a, fpimm:$b, (cond2cc $cc))>; + def : Pat<(i1 (setcc fpimm:$a, t.Ty:$b, cond:$cc)), + (!cast(NAME # "ir") fpimm:$a, $b, (cond2cc $cc))>; + } +} + +multiclass ISETP { + defvar op_str = "setp.${cmp:ICmp}.${cmp:IType}" # t.Size; + let hasSideEffects = false in { + def rr : + BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.RC:$b), + (ins CmpMode:$cmp), op_str>; def ri : - BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}." # TypeStr>; + BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.Imm:$b), + (ins CmpMode:$cmp), op_str>; def ir : - BasicFlagsNVPTXInst<(outs B1:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}." # TypeStr>; + BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.Imm:$a, t.RC:$b), + (ins CmpMode:$cmp), op_str>; } + def : Pat<(i1 (setcc t.Ty:$a, t.Ty:$b, cond:$cc)), + (!cast(NAME # "rr") $a, $b, (cond2cc $cc))>; + def : Pat<(i1 (setcc t.Ty:$a, imm:$b, cond:$cc)), + (!cast(NAME # "ri") $a, imm:$b, (cond2cc $cc))>; + def : Pat<(i1 (setcc imm:$a, t.Ty:$b, cond:$cc)), + (!cast(NAME # "ir") imm:$a, $b, (cond2cc $cc))>; } -defm SETP_b16 : SETP<"b16", B16, i16imm>; -defm SETP_s16 : SETP<"s16", B16, i16imm>; -defm SETP_u16 : SETP<"u16", B16, i16imm>; -defm SETP_b32 : SETP<"b32", B32, i32imm>; -defm SETP_s32 : SETP<"s32", B32, i32imm>; -defm SETP_u32 : SETP<"u32", B32, i32imm>; -defm SETP_b64 : SETP<"b64", B64, i64imm>; -defm SETP_s64 : SETP<"s64", B64, i64imm>; -defm SETP_u64 : SETP<"u64", B64, i64imm>; -defm SETP_f32 : SETP<"f32", B32, f32imm>; -defm SETP_f64 : SETP<"f64", B64, f64imm>; -def SETP_f16rr : - BasicFlagsNVPTXInst<(outs B1:$dst), - (ins B16:$a, B16:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}.f16">, - Requires<[useFP16Math]>; +defm SETP_i16 : ISETP; +defm SETP_i32 : ISETP; +defm SETP_i64 : ISETP; + +defm SETP_f32 : FSETP; +defm SETP_f64 : FSETP; +let Predicates = [useFP16Math] in + defm SETP_f16 : FSETP; +let Predicates = [hasBF16Math, hasPTX<78>, hasSM<90>] in + defm SETP_bf16 : FSETP; def SETP_f16x2rr : BasicFlagsNVPTXInst<(outs B1:$p, B1:$q), - (ins B32:$a, B32:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}.f16x2">, + (ins B32:$a, B32:$b), (ins CmpMode:$cmp, FTZFlag:$ftz), + "setp.${cmp:FCmp}$ftz.f16x2">, Requires<[useFP16Math]>; -def SETP_bf16rr : - BasicFlagsNVPTXInst<(outs B1:$dst), - (ins B16:$a, B16:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}.bf16">, - Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>; def SETP_bf16x2rr : BasicFlagsNVPTXInst<(outs B1:$p, B1:$q), - (ins B32:$a, B32:$b), (ins CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}.bf16x2">, + (ins B32:$a, B32:$b), (ins CmpMode:$cmp, FTZFlag:$ftz), + "setp.${cmp:FCmp}$ftz.bf16x2">, Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>; //----------------------------------- @@ -1786,209 +1688,36 @@ def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>; // Comparison and Selection //----------------------------------- -multiclass ISET_FORMAT { - // i16 -> pred - def : Pat<(i1 (OpNode i16:$a, i16:$b)), - (setp_16rr $a, $b, Mode)>; - def : Pat<(i1 (OpNode i16:$a, imm:$b)), - (setp_16ri $a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, i16:$b)), - (setp_16ir imm:$a, $b, Mode)>; - // i32 -> pred - def : Pat<(i1 (OpNode i32:$a, i32:$b)), - (setp_32rr $a, $b, Mode)>; - def : Pat<(i1 (OpNode i32:$a, imm:$b)), - (setp_32ri $a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, i32:$b)), - (setp_32ir imm:$a, $b, Mode)>; - // i64 -> pred - def : Pat<(i1 (OpNode i64:$a, i64:$b)), - (setp_64rr $a, $b, Mode)>; - def : Pat<(i1 (OpNode i64:$a, imm:$b)), - (setp_64ri $a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, i64:$b)), - (setp_64ir imm:$a, $b, Mode)>; -} - -multiclass ISET_FORMAT_SIGNED - : ISET_FORMAT { - // TableGen doesn't like empty multiclasses. - def : PatLeaf<(i32 0)>; -} - -multiclass ISET_FORMAT_UNSIGNED - : ISET_FORMAT { - // TableGen doesn't like empty multiclasses. - def : PatLeaf<(i32 0)>; -} - -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; +def cond_signed : PatLeaf<(cond), [{ + return isSignedIntSetCC(N->get()); +}]>; + +def cond_not_signed : PatLeaf<(cond), [{ + return !isSignedIntSetCC(N->get()); +}]>; // comparisons of i8 extracted with BFE as i32 // It's faster to do comparison directly on i32 extracted by BFE, // instead of the long conversion and sign extending. -def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), - (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>; -def: Pat<(setgt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), - (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>; -def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), - (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>; -def: Pat<(setge (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), - (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>; -def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), - (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>; -def: Pat<(setlt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), - (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>; -def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), - (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>; -def: Pat<(setle (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), - (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>; - -def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>; -def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>; -def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>; -def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>; -def: Pat<(setult (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>; -def: Pat<(setult (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>; -def: Pat<(setule (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>; -def: Pat<(setule (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>; -def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>; -def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>; -def: Pat<(setne (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>; -def: Pat<(setne (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), - (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>; - -// i1 compare -> i32 -def : Pat<(i32 (setne i1:$a, i1:$b)), - (SELP_b32ii -1, 0, (XORb1rr $a, $b))>; -def : Pat<(i32 (setne i1:$a, i1:$b)), - (SELP_b32ii 0, -1, (XORb1rr $a, $b))>; - - - -multiclass FSET_FORMAT { - // f16 -> pred - def : Pat<(i1 (OpNode f16:$a, f16:$b)), - (SETP_f16rr $a, $b, ModeFTZ)>, - Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i1 (OpNode f16:$a, f16:$b)), - (SETP_f16rr $a, $b, Mode)>, - Requires<[useFP16Math]>; - - // bf16 -> pred - def : Pat<(i1 (OpNode bf16:$a, bf16:$b)), - (SETP_bf16rr $a, $b, ModeFTZ)>, - Requires<[hasBF16Math, doF32FTZ]>; - def : Pat<(i1 (OpNode bf16:$a, bf16:$b)), - (SETP_bf16rr $a, $b, Mode)>, - Requires<[hasBF16Math]>; - - // f32 -> pred - def : Pat<(i1 (OpNode f32:$a, f32:$b)), - (SETP_f32rr $a, $b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode f32:$a, f32:$b)), - (SETP_f32rr $a, $b, Mode)>; - def : Pat<(i1 (OpNode f32:$a, fpimm:$b)), - (SETP_f32ri $a, fpimm:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode f32:$a, fpimm:$b)), - (SETP_f32ri $a, fpimm:$b, Mode)>; - def : Pat<(i1 (OpNode fpimm:$a, f32:$b)), - (SETP_f32ir fpimm:$a, $b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, f32:$b)), - (SETP_f32ir fpimm:$a, $b, Mode)>; - - // f64 -> pred - def : Pat<(i1 (OpNode f64:$a, f64:$b)), - (SETP_f64rr $a, $b, Mode)>; - def : Pat<(i1 (OpNode f64:$a, fpimm:$b)), - (SETP_f64ri $a, fpimm:$b, Mode)>; - def : Pat<(i1 (OpNode fpimm:$a, f64:$b)), - (SETP_f64ir fpimm:$a, $b, Mode)>; -} - -defm FSetOGT : FSET_FORMAT; -defm FSetOLT : FSET_FORMAT; -defm FSetOGE : FSET_FORMAT; -defm FSetOLE : FSET_FORMAT; -defm FSetOEQ : FSET_FORMAT; -defm FSetONE : FSET_FORMAT; - -defm FSetUGT : FSET_FORMAT; -defm FSetULT : FSET_FORMAT; -defm FSetUGE : FSET_FORMAT; -defm FSetULE : FSET_FORMAT; -defm FSetUEQ : FSET_FORMAT; -defm FSetUNE : FSET_FORMAT; - -defm FSetGT : FSET_FORMAT; -defm FSetLT : FSET_FORMAT; -defm FSetGE : FSET_FORMAT; -defm FSetLE : FSET_FORMAT; -defm FSetEQ : FSET_FORMAT; -defm FSetNE : FSET_FORMAT; - -defm FSetNUM : FSET_FORMAT; -defm FSetNAN : FSET_FORMAT; +def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), + (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8)), + cond_signed:$cc), + (SETP_i32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), (cond2cc $cc))>; + +def: Pat<(setcc (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), + (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8)), + cond_signed:$cc), + (SETP_i32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), (cond2cc $cc))>; + +def: Pat<(setcc (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255)), + cond_signed:$cc), + (SETP_i32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), (cond2cc $cc))>; + +def: Pat<(setcc (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255)), + cond_not_signed:$cc), + (SETP_i32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), (cond2cc $cc))>; def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; @@ -2394,25 +2123,25 @@ def : Pat<(f64 (uint_to_fp i64:$a)), (CVT_f64_u64 $a, CvtRN)>; // f16 -> sint -def : Pat<(i1 (fp_to_sint f16:$a)), (SETP_b16ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_sint f16:$a)), (SETP_i16ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_sint f16:$a)), (CVT_s16_f16 $a, CvtRZI)>; def : Pat<(i32 (fp_to_sint f16:$a)), (CVT_s32_f16 $a, CvtRZI)>; def : Pat<(i64 (fp_to_sint f16:$a)), (CVT_s64_f16 $a, CvtRZI)>; // f16 -> uint -def : Pat<(i1 (fp_to_uint f16:$a)), (SETP_b16ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_uint f16:$a)), (SETP_i16ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_uint f16:$a)), (CVT_u16_f16 $a, CvtRZI)>; def : Pat<(i32 (fp_to_uint f16:$a)), (CVT_u32_f16 $a, CvtRZI)>; def : Pat<(i64 (fp_to_uint f16:$a)), (CVT_u64_f16 $a, CvtRZI)>; // bf16 -> sint -def : Pat<(i1 (fp_to_sint bf16:$a)), (SETP_b16ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_sint bf16:$a)), (SETP_i16ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_sint bf16:$a)), (CVT_s16_bf16 $a, CvtRZI)>; def : Pat<(i32 (fp_to_sint bf16:$a)), (CVT_s32_bf16 $a, CvtRZI)>; def : Pat<(i64 (fp_to_sint bf16:$a)), (CVT_s64_bf16 $a, CvtRZI)>; // bf16 -> uint -def : Pat<(i1 (fp_to_uint bf16:$a)), (SETP_b16ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_uint bf16:$a)), (SETP_i16ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_uint bf16:$a)), (CVT_u16_bf16 $a, CvtRZI)>; def : Pat<(i32 (fp_to_uint bf16:$a)), (CVT_u32_bf16 $a, CvtRZI)>; def : Pat<(i64 (fp_to_uint bf16:$a)), (CVT_u64_bf16 $a, CvtRZI)>; @@ -2422,7 +2151,7 @@ let Predicates = [doF32FTZ] in { def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI_FTZ)>; def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI_FTZ)>; } -def : Pat<(i1 (fp_to_sint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_sint f32:$a)), (SETP_i32ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 $a, CvtRZI)>; def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI)>; def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI)>; @@ -2433,19 +2162,19 @@ let Predicates = [doF32FTZ] in { def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI_FTZ)>; def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI_FTZ)>; } -def : Pat<(i1 (fp_to_uint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_uint f32:$a)), (SETP_i32ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 $a, CvtRZI)>; def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI)>; def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI)>; // f64 -> sint -def : Pat<(i1 (fp_to_sint f64:$a)), (SETP_b64ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_sint f64:$a)), (SETP_i64ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_sint f64:$a)), (CVT_s16_f64 $a, CvtRZI)>; def : Pat<(i32 (fp_to_sint f64:$a)), (CVT_s32_f64 $a, CvtRZI)>; def : Pat<(i64 (fp_to_sint f64:$a)), (CVT_s64_f64 $a, CvtRZI)>; // f64 -> uint -def : Pat<(i1 (fp_to_uint f64:$a)), (SETP_b64ri $a, 0, CmpEQ)>; +def : Pat<(i1 (fp_to_uint f64:$a)), (SETP_i64ri $a, 0, CmpEQ)>; def : Pat<(i16 (fp_to_uint f64:$a)), (CVT_u16_f64 $a, CvtRZI)>; def : Pat<(i32 (fp_to_uint f64:$a)), (CVT_u32_f64 $a, CvtRZI)>; def : Pat<(i64 (fp_to_uint f64:$a)), (CVT_u64_f64 $a, CvtRZI)>; @@ -2490,14 +2219,14 @@ def : Pat<(i64 (anyext i32:$a)), (CVT_u64_u32 $a, CvtNONE)>; // truncate i64 def : Pat<(i32 (trunc i64:$a)), (CVT_u32_u64 $a, CvtNONE)>; def : Pat<(i16 (trunc i64:$a)), (CVT_u16_u64 $a, CvtNONE)>; -def : Pat<(i1 (trunc i64:$a)), (SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE)>; +def : Pat<(i1 (trunc i64:$a)), (SETP_i64ri (ANDb64ri $a, 1), 0, CmpNE)>; // truncate i32 def : Pat<(i16 (trunc i32:$a)), (CVT_u16_u32 $a, CvtNONE)>; -def : Pat<(i1 (trunc i32:$a)), (SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE)>; +def : Pat<(i1 (trunc i32:$a)), (SETP_i32ri (ANDb32ri $a, 1), 0, CmpNE)>; // truncate i16 -def : Pat<(i1 (trunc i16:$a)), (SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE)>; +def : Pat<(i1 (trunc i16:$a)), (SETP_i16ri (ANDb16ri $a, 1), 0, CmpNE)>; // sext_inreg def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>; @@ -2746,7 +2475,7 @@ let isTerminator=1 in { } def : Pat<(brcond i32:$a, bb:$target), - (CBranch (SETP_u32ri $a, 0, CmpNE), bb:$target)>; + (CBranch (SETP_i32ri $a, 0, CmpNE), bb:$target)>; // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a // conditional branch if the target block is the next block so that the code @@ -2976,31 +2705,18 @@ def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b), return N->getFlags().hasNoSignedZeros() || TM.Options.NoSignedZerosFPMath; }]>; -class NVPTXInst_rrr Preds> - : BasicNVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), Instruction>, - Requires; - -def FMARELU_F16 : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_F16_FTZ : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_BF16 : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_F16X2 : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_F16X2_FTZ : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_BF16X2 : NVPTXInst_rrr, hasSM<80>]>; - -// FTZ -def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)), - (FMARELU_F16_FTZ $a, $b, $c)>, - Requires<[doF32FTZ]>; -def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)), - (FMARELU_F16X2_FTZ $a, $b, $c)>, - Requires<[doF32FTZ]>; - -// NO FTZ -def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)), - (FMARELU_F16 $a, $b, $c)>; -def : Pat<(bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan bf16:$a, bf16:$b, bf16:$c), fpimm_any_zero)), - (FMARELU_BF16 $a, $b, $c)>; -def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)), - (FMARELU_F16X2 $a, $b, $c)>; -def : Pat<(v2bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2bf16:$a, v2bf16:$b, v2bf16:$c), fpimm_positive_zero_v2bf16)), - (FMARELU_BF16X2 $a, $b, $c)>; +class FMARELUInst + : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c), + !if(allow_ftz, (ins FTZFlag:$ftz), (ins)), + "fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.Str, + [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>; + +let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in { + def FMARELU_F16 : FMARELUInst; + def FMARELU_F16X2 : FMARELUInst; +} + +let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in { + def FMARELU_BF16 : FMARELUInst; + def FMARELU_BF16X2 : FMARELUInst; +} \ No newline at end of file diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index cc1fd027d8515..d840324ce8238 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1006,12 +1006,14 @@ def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs), // Min Max // -def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", B32, B32, B32, int_nvvm_fmin_f>; -def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32", B32, B32, B32, int_nvvm_fmin_ftz_f>; -def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32", B32, B32, B32, int_nvvm_fmin_nan_f, - [hasPTX<70>, hasSM<80>]>; -def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_f, - [hasPTX<70>, hasSM<80>]>; +def : Pat<(int_nvvm_fmin_f f32:$a, f32:$b), (MIN_f32_rr $a, $b, NoFTZ)>; +def : Pat<(int_nvvm_fmin_ftz_f f32:$a, f32:$b), (MIN_f32_rr $a, $b, FTZ)>; + +let Predicates = [hasPTX<70>, hasSM<80>] in { + def : Pat<(int_nvvm_fmin_nan_f f32:$a, f32:$b), (MIN_NAN_f32_rr $a, $b, NoFTZ)>; + def : Pat<(int_nvvm_fmin_ftz_nan_f f32:$a, f32:$b), (MIN_NAN_f32_rr $a, $b, FTZ)>; +} + def INT_NVVM_FMIN_XORSIGN_ABS_F : F_MATH_2<"min.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; @@ -1025,12 +1027,15 @@ def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : F_MATH_2<"min.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; -def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", B32, B32, B32, int_nvvm_fmax_f>; -def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32", B32, B32, B32, int_nvvm_fmax_ftz_f>; -def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32", B32, B32, B32, int_nvvm_fmax_nan_f, - [hasPTX<70>, hasSM<80>]>; -def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_f, - [hasPTX<70>, hasSM<80>]>; + +def : Pat<(int_nvvm_fmax_f f32:$a, f32:$b), (MAX_f32_rr $a, $b, NoFTZ)>; +def : Pat<(int_nvvm_fmax_ftz_f f32:$a, f32:$b), (MAX_f32_rr $a, $b, FTZ)>; + +let Predicates = [hasPTX<70>, hasSM<80>] in { + def : Pat<(int_nvvm_fmax_nan_f f32:$a, f32:$b), (MAX_NAN_f32_rr $a, $b, NoFTZ)>; + def : Pat<(int_nvvm_fmax_ftz_nan_f f32:$a, f32:$b), (MAX_NAN_f32_rr $a, $b, FTZ)>; +} + def INT_NVVM_FMAX_XORSIGN_ABS_F : F_MATH_2<"max.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; @@ -1044,8 +1049,8 @@ def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : F_MATH_2<"max.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; -def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", B64, B64, B64, int_nvvm_fmin_d>; -def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", B64, B64, B64, int_nvvm_fmax_d>; +def : Pat<(int_nvvm_fmin_d f64:$a, f64:$b), (MIN_f64_rr $a, $b)>; +def : Pat<(int_nvvm_fmax_d f64:$a, f64:$b), (MAX_f64_rr $a, $b)>; // // Min Max f16, f16x2, bf16, bf16x2 @@ -1181,17 +1186,10 @@ def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64", B64, B64, B64, int_nvvm_div_rz_d> def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64", B64, B64, B64, int_nvvm_div_rm_d>; def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64", B64, B64, B64, int_nvvm_div_rp_d>; -def : Pat<(int_nvvm_div_full f32:$a, f32:$b), - (FDIV32rr $a, $b)>; - -def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), - (FDIV32ri $a, f32imm:$b)>; - -def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), - (FDIV32rr_ftz $a, $b)>; - -def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), - (FDIV32ri_ftz $a, f32imm:$b)>; +def : Pat<(int_nvvm_div_full f32:$a, f32:$b), (FDIV32rr $a, $b, NoFTZ)>; +def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), (FDIV32ri $a, f32imm:$b, NoFTZ)>; +def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), (FDIV32rr $a, $b, FTZ)>; +def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), (FDIV32ri $a, f32imm:$b, FTZ)>; // // Sad @@ -1288,48 +1286,36 @@ def : Pat<(int_nvvm_saturate_d f64:$a), (CVT_f64_f64 $a, CvtSAT)>; // Exp2 Log2 // -def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32", - F32RT, F32RT, int_nvvm_ex2_approx_ftz_f>; -def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32", - F32RT, F32RT, int_nvvm_ex2_approx_f>; -def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64", - F64RT, F64RT, int_nvvm_ex2_approx_d>; - -def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16", - F16RT, F16RT, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; -def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2", - F16X2RT, F16X2RT, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; - -def : Pat<(fexp2 f32:$a), (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>; -def : Pat<(fexp2 f32:$a), (INT_NVVM_EX2_APPROX_F $a)>; -def : Pat<(fexp2 f16:$a), (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>; -def : Pat<(fexp2 v2f16:$a), (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>; - -def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32", - F32RT, F32RT, int_nvvm_lg2_approx_ftz_f>; -def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32", - F32RT, F32RT, int_nvvm_lg2_approx_f>; -def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64", - F64RT, F64RT, int_nvvm_lg2_approx_d>; - -def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>, - Requires<[doF32FTZ]>; -def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>, - Requires<[doNoF32FTZ]>; +def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>; +def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>; + +let Predicates = [hasPTX<70>, hasSM<75>] in { + def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>; + def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>; +} + +def LG2_APPROX_f32 : + BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz), + "lg2.approx$ftz.f32", + [(set f32:$dst, (flog2 f32:$src))]>; + +def LG2_APPROX_f64 : + BasicNVPTXInst<(outs B64:$dst), (ins B64:$src), + "lg2.approx.f64", + [(set f64:$dst, (flog2 f64:$src))]>; + +def : Pat<(int_nvvm_lg2_approx_ftz_f f32:$a), (LG2_APPROX_f32 $a, FTZ)>; +def : Pat<(int_nvvm_lg2_approx_f f32:$a), (LG2_APPROX_f32 $a, NoFTZ)>; +def : Pat<(int_nvvm_lg2_approx_d f64:$a), (LG2_APPROX_f64 $a)>; // // Sin Cos // -def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32", - F32RT, F32RT, int_nvvm_sin_approx_ftz_f>; -def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32", - F32RT, F32RT, int_nvvm_sin_approx_f>; - -def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32", - F32RT, F32RT, int_nvvm_cos_approx_ftz_f>; -def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32", - F32RT, F32RT, int_nvvm_cos_approx_f>; +def : Pat<(int_nvvm_sin_approx_ftz_f f32:$a), (SIN_APPROX_f32 $a, FTZ)>; +def : Pat<(int_nvvm_sin_approx_f f32:$a), (SIN_APPROX_f32 $a, NoFTZ)>; +def : Pat<(int_nvvm_cos_approx_ftz_f f32:$a), (COS_APPROX_f32 $a, FTZ)>; +def : Pat<(int_nvvm_cos_approx_f f32:$a), (COS_APPROX_f32 $a, NoFTZ)>; // // Fma @@ -1478,31 +1464,30 @@ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_F $a)>; // Rsqrt // -def INT_NVVM_RSQRT_APPROX_FTZ_F - : F_MATH_1<"rsqrt.approx.ftz.f32", F32RT, F32RT, int_nvvm_rsqrt_approx_ftz_f>; -def INT_NVVM_RSQRT_APPROX_FTZ_D - : F_MATH_1<"rsqrt.approx.ftz.f64", F64RT, F64RT, int_nvvm_rsqrt_approx_ftz_d>; +foreach t = [F32RT, F64RT] in { + def RSQRT_APPROX_ # t.Ty : + BasicFlagsNVPTXInst<(outs t.RC:$dst), + (ins t.RC:$a), (ins FTZFlag:$ftz), + "rsqrt.approx$ftz.f" # t.Size>; +} + +def : Pat<(int_nvvm_rsqrt_approx_ftz_f f32:$a), (RSQRT_APPROX_f32 $a, FTZ)>; +def : Pat<(int_nvvm_rsqrt_approx_ftz_d f64:$a), (RSQRT_APPROX_f64 $a, FTZ)>; +def : Pat<(int_nvvm_rsqrt_approx_f f32:$a), (RSQRT_APPROX_f32 $a, NoFTZ)>; +def : Pat<(int_nvvm_rsqrt_approx_d f64:$a), (RSQRT_APPROX_f64 $a, NoFTZ)>; -def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32", - F32RT, F32RT, int_nvvm_rsqrt_approx_f>; -def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64", - F64RT, F64RT, int_nvvm_rsqrt_approx_d>; // 1.0f / sqrt_approx -> rsqrt_approx -def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)), - (INT_NVVM_RSQRT_APPROX_F $a)>, - Requires<[doRsqrtOpt]>; -def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), - (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, - Requires<[doRsqrtOpt]>; - -// same for int_nvvm_sqrt_f when non-precision sqrt is requested -def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)), - (INT_NVVM_RSQRT_APPROX_F $a)>, - Requires<[doRsqrtOpt, doNoF32FTZ]>; -def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)), - (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, - Requires<[doRsqrtOpt, doF32FTZ]>; +let Predicates = [doRsqrtOpt] in { + def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)), + (RSQRT_APPROX_f32 $a, NoFTZ)>; + def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), + (RSQRT_APPROX_f32 $a, FTZ)>; + + // same for int_nvvm_sqrt_f when non-precision sqrt is requested + def : Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)), + (RSQRT_APPROX_f32 $a)>; +} // // Add // diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index 61c3ba2ee54ab..e3b072549bc04 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -48,8 +48,8 @@ body: | %4 = CVT_f32_f64 %3, 5 %5 = CVT_f32_s32 %2, 5 ; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00 - %6 = FADD_rnf32ri %5, float 6.250000e+00 - %7 = FMUL_rnf32rr %6, %4 + %6 = FADD_rnf32ri %5, float 6.250000e+00, 0 + %7 = FMUL_rnf32rr %6, %4, 0 ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101) Return ... @@ -74,8 +74,8 @@ body: | %4 = CVT_f32_f64 %3, 5 %5 = CVT_f32_s32 %2, 5 ; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000 - %6 = FADD_rnf32ri %5, float 0x7FF8000000000000 - %7 = FMUL_rnf32rr %6, %4 + %6 = FADD_rnf32ri %5, float 0x7FF8000000000000, 0 + %7 = FMUL_rnf32rr %6, %4, 0 ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101) Return ... diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll index 22a7177650ee2..94f49b01e6ea6 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll @@ -71,7 +71,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: and.b32 %r31, %r54, %r3; ; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30; ; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32; -; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54; +; CHECKPTX62-NEXT: setp.ne.b32 %p1, %r6, %r54; ; CHECKPTX62-NEXT: mov.b32 %r54, %r6; ; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1; ; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44 @@ -87,7 +87,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: and.b32 %r36, %r55, %r3; ; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35; ; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37; -; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55; +; CHECKPTX62-NEXT: setp.ne.b32 %p2, %r9, %r55; ; CHECKPTX62-NEXT: mov.b32 %r55, %r9; ; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3; ; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26 @@ -108,7 +108,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: and.b32 %r44, %r56, %r12; ; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43; ; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45; -; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56; +; CHECKPTX62-NEXT: setp.ne.b32 %p3, %r15, %r56; ; CHECKPTX62-NEXT: mov.b32 %r56, %r15; ; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5; ; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8 @@ -129,7 +129,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: and.b32 %r52, %r57, %r18; ; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51; ; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53; -; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57; +; CHECKPTX62-NEXT: setp.ne.b32 %p4, %r21, %r57; ; CHECKPTX62-NEXT: mov.b32 %r57, %r21; ; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7; ; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll index b5a4f94611453..b21bd16d55c2c 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll @@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: and.b32 %r31, %r54, %r3; ; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30; ; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32; -; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54; +; CHECKPTX71-NEXT: setp.ne.b32 %p1, %r6, %r54; ; CHECKPTX71-NEXT: mov.b32 %r54, %r6; ; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1; ; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44 @@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: and.b32 %r36, %r55, %r3; ; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35; ; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37; -; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55; +; CHECKPTX71-NEXT: setp.ne.b32 %p2, %r9, %r55; ; CHECKPTX71-NEXT: mov.b32 %r55, %r9; ; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3; ; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26 @@ -110,7 +110,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: and.b32 %r44, %r56, %r12; ; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43; ; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45; -; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56; +; CHECKPTX71-NEXT: setp.ne.b32 %p3, %r15, %r56; ; CHECKPTX71-NEXT: mov.b32 %r56, %r15; ; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5; ; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8 @@ -132,7 +132,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: and.b32 %r52, %r57, %r18; ; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51; ; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53; -; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57; +; CHECKPTX71-NEXT: setp.ne.b32 %p4, %r21, %r57; ; CHECKPTX71-NEXT: mov.b32 %r57, %r21; ; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7; ; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll index 55ce3dfc46539..04a58cf22cfc5 100644 --- a/llvm/test/CodeGen/NVPTX/atomics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics.ll @@ -453,7 +453,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) { ; CHECK-NEXT: or.b32 %r17, %r16, %r15; ; CHECK-NEXT: membar.sys; ; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r19, %r17; -; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r19; +; CHECK-NEXT: setp.ne.b32 %p1, %r5, %r19; ; CHECK-NEXT: mov.b32 %r19, %r5; ; CHECK-NEXT: @%p1 bra $L__BB24_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll index f59f51c3c57d3..a386e4292777b 100644 --- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll @@ -1339,9 +1339,9 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) { ; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2; ; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2; -; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0; +; SM70-NEXT: setp.eq.b16 %p3, %rs1, 0; ; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0; +; SM70-NEXT: setp.eq.b16 %p4, %rs2, 0; ; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; SM70-NEXT: cvt.u32.u16 %r5, %rs4; ; SM70-NEXT: shl.b32 %r6, %r5, 16; @@ -1462,9 +1462,9 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2; ; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2; -; SM70-NEXT: setp.eq.s16 %p3, %rs2, 0; +; SM70-NEXT: setp.eq.b16 %p3, %rs2, 0; ; SM70-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; -; SM70-NEXT: setp.eq.s16 %p4, %rs4, 0; +; SM70-NEXT: setp.eq.b16 %p4, %rs4, 0; ; SM70-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; SM70-NEXT: cvt.u32.u16 %r5, %rs6; ; SM70-NEXT: shl.b32 %r6, %r5, 16; @@ -1478,9 +1478,9 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; SM70-NEXT: setp.nan.f32 %p7, %r10, %r8; ; SM70-NEXT: selp.b16 %rs11, 0x7FC0, %rs10, %p7; -; SM70-NEXT: setp.eq.s16 %p8, %rs1, 0; +; SM70-NEXT: setp.eq.b16 %p8, %rs1, 0; ; SM70-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; -; SM70-NEXT: setp.eq.s16 %p9, %rs3, 0; +; SM70-NEXT: setp.eq.b16 %p9, %rs3, 0; ; SM70-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; SM70-NEXT: cvt.u32.u16 %r11, %rs11; ; SM70-NEXT: shl.b32 %r12, %r11, 16; diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir index 4d80d52de8da8..7a2b92af4b4a8 100644 --- a/llvm/test/CodeGen/NVPTX/branch-fold.mir +++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir @@ -58,7 +58,7 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:b64 = ADDi64ri [[ADDi64ri]], 1 - ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_s64ri [[ADDi64ri]], 1, 2 + ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_i64ri [[ADDi64ri]], 1, 20 ; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb4: @@ -77,7 +77,7 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) %5:b64 = ADDi64ri %5, 1 - %4:b1 = SETP_s64ri %5, 1, 2 + %4:b1 = SETP_i64ri %5, 1, 20 CBranch %4, %bb.2 bb.3.bb4: diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll index 65a077d67e4ba..5bf7a37a00c66 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll @@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB0_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB0_1; ; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end @@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB1_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB1_1; ; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end @@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB2_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB2_1; ; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end @@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB3_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB3_1; ; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end @@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB4_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB4_1; ; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end @@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB5_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB5_1; ; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end @@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB6_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB6_1; ; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end @@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB7_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB7_1; ; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end @@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB8_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB8_1; ; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end @@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB9_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB9_1; ; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end @@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB10_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB10_1; ; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end @@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB11_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB11_1; ; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end @@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB12_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB12_1; ; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end @@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB13_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB13_1; ; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end @@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB14_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB14_1; ; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end @@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB15_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB15_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB15_1; ; SM60-NEXT: $L__BB15_3: // %partword.cmpxchg.end @@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB16_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB16_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB16_1; ; SM60-NEXT: $L__BB16_3: // %partword.cmpxchg.end @@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB17_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB17_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB17_1; ; SM60-NEXT: $L__BB17_3: // %partword.cmpxchg.end @@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB18_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB18_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB18_1; ; SM60-NEXT: $L__BB18_3: // %partword.cmpxchg.end @@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB19_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB19_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB19_1; ; SM60-NEXT: $L__BB19_3: // %partword.cmpxchg.end @@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB20_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB20_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB20_1; ; SM60-NEXT: $L__BB20_3: // %partword.cmpxchg.end @@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB21_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB21_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB21_1; ; SM60-NEXT: $L__BB21_3: // %partword.cmpxchg.end @@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB22_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB22_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB22_1; ; SM60-NEXT: $L__BB22_3: // %partword.cmpxchg.end @@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB23_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB23_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB23_1; ; SM60-NEXT: $L__BB23_3: // %partword.cmpxchg.end @@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB24_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB24_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB24_1; ; SM60-NEXT: $L__BB24_3: // %partword.cmpxchg.end @@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB25_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB25_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB25_1; ; SM60-NEXT: $L__BB25_3: // %partword.cmpxchg.end @@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB26_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB26_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB26_1; ; SM60-NEXT: $L__BB26_3: // %partword.cmpxchg.end @@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB27_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB27_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB27_1; ; SM60-NEXT: $L__BB27_3: // %partword.cmpxchg.end @@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB28_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB28_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB28_1; ; SM60-NEXT: $L__BB28_3: // %partword.cmpxchg.end @@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB29_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB29_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB29_1; ; SM60-NEXT: $L__BB29_3: // %partword.cmpxchg.end @@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB30_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB30_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB30_1; ; SM60-NEXT: $L__BB30_3: // %partword.cmpxchg.end @@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB31_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB31_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB31_1; ; SM60-NEXT: $L__BB31_3: // %partword.cmpxchg.end @@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB32_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB32_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB32_1; ; SM60-NEXT: $L__BB32_3: // %partword.cmpxchg.end @@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB33_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB33_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB33_1; ; SM60-NEXT: $L__BB33_3: // %partword.cmpxchg.end @@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB34_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB34_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB34_1; ; SM60-NEXT: $L__BB34_3: // %partword.cmpxchg.end @@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB35_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB35_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB35_1; ; SM60-NEXT: $L__BB35_3: // %partword.cmpxchg.end @@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB36_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB36_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB36_1; ; SM60-NEXT: $L__BB36_3: // %partword.cmpxchg.end @@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB37_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB37_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB37_1; ; SM60-NEXT: $L__BB37_3: // %partword.cmpxchg.end @@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB38_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB38_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB38_1; ; SM60-NEXT: $L__BB38_3: // %partword.cmpxchg.end @@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB39_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB39_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB39_1; ; SM60-NEXT: $L__BB39_3: // %partword.cmpxchg.end @@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB40_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB40_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB40_1; ; SM60-NEXT: $L__BB40_3: // %partword.cmpxchg.end @@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB41_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB41_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB41_1; ; SM60-NEXT: $L__BB41_3: // %partword.cmpxchg.end @@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB42_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB42_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB42_1; ; SM60-NEXT: $L__BB42_3: // %partword.cmpxchg.end @@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB43_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB43_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB43_1; ; SM60-NEXT: $L__BB43_3: // %partword.cmpxchg.end @@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: or.b32 %r17, %r20, %r3; ; SM60-NEXT: or.b32 %r18, %r20, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM60-NEXT: @%p1 bra $L__BB44_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB44_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB44_1; ; SM60-NEXT: $L__BB44_3: // %partword.cmpxchg.end @@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB45_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB45_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB45_1; ; SM60-NEXT: $L__BB45_3: // %partword.cmpxchg.end @@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB46_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB46_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB46_1; ; SM60-NEXT: $L__BB46_3: // %partword.cmpxchg.end @@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB47_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB47_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB47_1; ; SM60-NEXT: $L__BB47_3: // %partword.cmpxchg.end @@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB48_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB48_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB48_1; ; SM60-NEXT: $L__BB48_3: // %partword.cmpxchg.end @@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB49_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB49_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB49_1; ; SM60-NEXT: $L__BB49_3: // %partword.cmpxchg.end @@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB50_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB50_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB50_1; ; SM60-NEXT: $L__BB50_3: // %partword.cmpxchg.end @@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB51_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB51_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB51_1; ; SM60-NEXT: $L__BB51_3: // %partword.cmpxchg.end @@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB52_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB52_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB52_1; ; SM60-NEXT: $L__BB52_3: // %partword.cmpxchg.end @@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB53_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB53_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB53_1; ; SM60-NEXT: $L__BB53_3: // %partword.cmpxchg.end @@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB54_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB54_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB54_1; ; SM60-NEXT: $L__BB54_3: // %partword.cmpxchg.end @@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB55_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB55_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB55_1; ; SM60-NEXT: $L__BB55_3: // %partword.cmpxchg.end @@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB56_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB56_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB56_1; ; SM60-NEXT: $L__BB56_3: // %partword.cmpxchg.end @@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB57_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB57_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB57_1; ; SM60-NEXT: $L__BB57_3: // %partword.cmpxchg.end @@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB58_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB58_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB58_1; ; SM60-NEXT: $L__BB58_3: // %partword.cmpxchg.end @@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB59_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB59_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB59_1; ; SM60-NEXT: $L__BB59_3: // %partword.cmpxchg.end @@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB60_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB60_1; ; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end @@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB61_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB61_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB61_1; ; SM60-NEXT: $L__BB61_3: // %partword.cmpxchg.end @@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB62_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB62_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB62_1; ; SM60-NEXT: $L__BB62_3: // %partword.cmpxchg.end @@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB63_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB63_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB63_1; ; SM60-NEXT: $L__BB63_3: // %partword.cmpxchg.end @@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB64_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB64_1; ; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end @@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB65_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB65_1; ; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end @@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB66_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB66_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB66_1; ; SM60-NEXT: $L__BB66_3: // %partword.cmpxchg.end @@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB67_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB67_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB67_1; ; SM60-NEXT: $L__BB67_3: // %partword.cmpxchg.end @@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB68_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB68_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB68_1; ; SM60-NEXT: $L__BB68_3: // %partword.cmpxchg.end @@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB69_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB69_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB69_1; ; SM60-NEXT: $L__BB69_3: // %partword.cmpxchg.end @@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB70_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB70_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB70_1; ; SM60-NEXT: $L__BB70_3: // %partword.cmpxchg.end @@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB71_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB71_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB71_1; ; SM60-NEXT: $L__BB71_3: // %partword.cmpxchg.end @@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB72_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB72_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB72_1; ; SM60-NEXT: $L__BB72_3: // %partword.cmpxchg.end @@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB73_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB73_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB73_1; ; SM60-NEXT: $L__BB73_3: // %partword.cmpxchg.end @@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB74_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB74_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB74_1; ; SM60-NEXT: $L__BB74_3: // %partword.cmpxchg.end @@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB75_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB75_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB75_1; ; SM60-NEXT: $L__BB75_3: // %partword.cmpxchg.end @@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB76_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB76_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB76_1; ; SM60-NEXT: $L__BB76_3: // %partword.cmpxchg.end @@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB77_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB77_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB77_1; ; SM60-NEXT: $L__BB77_3: // %partword.cmpxchg.end @@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB78_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB78_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB78_1; ; SM60-NEXT: $L__BB78_3: // %partword.cmpxchg.end @@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB79_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB79_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB79_1; ; SM60-NEXT: $L__BB79_3: // %partword.cmpxchg.end @@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB80_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB80_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB80_1; ; SM60-NEXT: $L__BB80_3: // %partword.cmpxchg.end @@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB81_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB81_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB81_1; ; SM60-NEXT: $L__BB81_3: // %partword.cmpxchg.end @@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB82_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB82_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB82_1; ; SM60-NEXT: $L__BB82_3: // %partword.cmpxchg.end @@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB83_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB83_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB83_1; ; SM60-NEXT: $L__BB83_3: // %partword.cmpxchg.end @@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB84_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB84_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB84_1; ; SM60-NEXT: $L__BB84_3: // %partword.cmpxchg.end @@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB85_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB85_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB85_1; ; SM60-NEXT: $L__BB85_3: // %partword.cmpxchg.end @@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB86_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB86_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB86_1; ; SM60-NEXT: $L__BB86_3: // %partword.cmpxchg.end @@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB87_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB87_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB87_1; ; SM60-NEXT: $L__BB87_3: // %partword.cmpxchg.end @@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB88_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB88_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB88_1; ; SM60-NEXT: $L__BB88_3: // %partword.cmpxchg.end @@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: or.b32 %r16, %r19, %r3; ; SM60-NEXT: or.b32 %r17, %r19, %r4; ; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM60-NEXT: @%p1 bra $L__BB89_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB89_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; -; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB89_1; ; SM60-NEXT: $L__BB89_3: // %partword.cmpxchg.end diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll index 7107fbcf6eb54..dcd215d60ace5 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll @@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB0_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end @@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB1_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB1_1; ; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end @@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB2_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB2_1; ; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end @@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB3_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB3_1; ; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end @@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB4_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB4_1; ; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end @@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB5_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB5_1; ; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end @@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB6_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB6_1; ; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end @@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB7_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB7_1; ; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end @@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB8_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB8_1; ; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end @@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB9_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB9_1; ; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end @@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB10_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB10_1; ; SM70-NEXT: $L__BB10_3: // %partword.cmpxchg.end @@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB11_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB11_1; ; SM70-NEXT: $L__BB11_3: // %partword.cmpxchg.end @@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB12_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB12_1; ; SM70-NEXT: $L__BB12_3: // %partword.cmpxchg.end @@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB13_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB13_1; ; SM70-NEXT: $L__BB13_3: // %partword.cmpxchg.end @@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB14_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB14_1; ; SM70-NEXT: $L__BB14_3: // %partword.cmpxchg.end @@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB15_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB15_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB15_1; ; SM70-NEXT: $L__BB15_3: // %partword.cmpxchg.end @@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB16_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB16_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB16_1; ; SM70-NEXT: $L__BB16_3: // %partword.cmpxchg.end @@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB17_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB17_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB17_1; ; SM70-NEXT: $L__BB17_3: // %partword.cmpxchg.end @@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB18_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB18_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB18_1; ; SM70-NEXT: $L__BB18_3: // %partword.cmpxchg.end @@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB19_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB19_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB19_1; ; SM70-NEXT: $L__BB19_3: // %partword.cmpxchg.end @@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB20_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB20_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB20_1; ; SM70-NEXT: $L__BB20_3: // %partword.cmpxchg.end @@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB21_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB21_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB21_1; ; SM70-NEXT: $L__BB21_3: // %partword.cmpxchg.end @@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB22_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB22_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB22_1; ; SM70-NEXT: $L__BB22_3: // %partword.cmpxchg.end @@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB23_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB23_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB23_1; ; SM70-NEXT: $L__BB23_3: // %partword.cmpxchg.end @@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB24_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB24_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB24_1; ; SM70-NEXT: $L__BB24_3: // %partword.cmpxchg.end @@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB25_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB25_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB25_1; ; SM70-NEXT: $L__BB25_3: // %partword.cmpxchg.end @@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB26_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB26_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB26_1; ; SM70-NEXT: $L__BB26_3: // %partword.cmpxchg.end @@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB27_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB27_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB27_1; ; SM70-NEXT: $L__BB27_3: // %partword.cmpxchg.end @@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB28_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB28_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB28_1; ; SM70-NEXT: $L__BB28_3: // %partword.cmpxchg.end @@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB29_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB29_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB29_1; ; SM70-NEXT: $L__BB29_3: // %partword.cmpxchg.end @@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB30_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB30_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB30_1; ; SM70-NEXT: $L__BB30_3: // %partword.cmpxchg.end @@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB31_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB31_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB31_1; ; SM70-NEXT: $L__BB31_3: // %partword.cmpxchg.end @@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB32_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB32_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB32_1; ; SM70-NEXT: $L__BB32_3: // %partword.cmpxchg.end @@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB33_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB33_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB33_1; ; SM70-NEXT: $L__BB33_3: // %partword.cmpxchg.end @@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB34_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB34_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB34_1; ; SM70-NEXT: $L__BB34_3: // %partword.cmpxchg.end @@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB35_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB35_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB35_1; ; SM70-NEXT: $L__BB35_3: // %partword.cmpxchg.end @@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB36_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB36_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB36_1; ; SM70-NEXT: $L__BB36_3: // %partword.cmpxchg.end @@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB37_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB37_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB37_1; ; SM70-NEXT: $L__BB37_3: // %partword.cmpxchg.end @@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB38_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB38_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB38_1; ; SM70-NEXT: $L__BB38_3: // %partword.cmpxchg.end @@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB39_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB39_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB39_1; ; SM70-NEXT: $L__BB39_3: // %partword.cmpxchg.end @@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB40_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB40_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB40_1; ; SM70-NEXT: $L__BB40_3: // %partword.cmpxchg.end @@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB41_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB41_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB41_1; ; SM70-NEXT: $L__BB41_3: // %partword.cmpxchg.end @@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB42_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB42_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB42_1; ; SM70-NEXT: $L__BB42_3: // %partword.cmpxchg.end @@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB43_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB43_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB43_1; ; SM70-NEXT: $L__BB43_3: // %partword.cmpxchg.end @@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB44_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB44_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB44_1; ; SM70-NEXT: $L__BB44_3: // %partword.cmpxchg.end @@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB45_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB45_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB45_1; ; SM70-NEXT: $L__BB45_3: // %partword.cmpxchg.end @@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB46_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB46_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB46_1; ; SM70-NEXT: $L__BB46_3: // %partword.cmpxchg.end @@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB47_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB47_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB47_1; ; SM70-NEXT: $L__BB47_3: // %partword.cmpxchg.end @@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB48_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB48_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB48_1; ; SM70-NEXT: $L__BB48_3: // %partword.cmpxchg.end @@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB49_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB49_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB49_1; ; SM70-NEXT: $L__BB49_3: // %partword.cmpxchg.end @@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB50_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB50_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB50_1; ; SM70-NEXT: $L__BB50_3: // %partword.cmpxchg.end @@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB51_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB51_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB51_1; ; SM70-NEXT: $L__BB51_3: // %partword.cmpxchg.end @@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB52_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB52_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB52_1; ; SM70-NEXT: $L__BB52_3: // %partword.cmpxchg.end @@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB53_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB53_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB53_1; ; SM70-NEXT: $L__BB53_3: // %partword.cmpxchg.end @@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB54_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB54_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB54_1; ; SM70-NEXT: $L__BB54_3: // %partword.cmpxchg.end @@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB55_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB55_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB55_1; ; SM70-NEXT: $L__BB55_3: // %partword.cmpxchg.end @@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB56_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB56_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB56_1; ; SM70-NEXT: $L__BB56_3: // %partword.cmpxchg.end @@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB57_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB57_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB57_1; ; SM70-NEXT: $L__BB57_3: // %partword.cmpxchg.end @@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB58_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB58_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB58_1; ; SM70-NEXT: $L__BB58_3: // %partword.cmpxchg.end @@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB59_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB59_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB59_1; ; SM70-NEXT: $L__BB59_3: // %partword.cmpxchg.end @@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB60_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB60_1; ; SM70-NEXT: $L__BB60_3: // %partword.cmpxchg.end @@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB61_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB61_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB61_1; ; SM70-NEXT: $L__BB61_3: // %partword.cmpxchg.end @@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB62_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB62_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB62_1; ; SM70-NEXT: $L__BB62_3: // %partword.cmpxchg.end @@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB63_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB63_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB63_1; ; SM70-NEXT: $L__BB63_3: // %partword.cmpxchg.end @@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB64_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB64_1; ; SM70-NEXT: $L__BB64_3: // %partword.cmpxchg.end @@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB65_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB65_1; ; SM70-NEXT: $L__BB65_3: // %partword.cmpxchg.end @@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB66_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB66_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB66_1; ; SM70-NEXT: $L__BB66_3: // %partword.cmpxchg.end @@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB67_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB67_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB67_1; ; SM70-NEXT: $L__BB67_3: // %partword.cmpxchg.end @@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB68_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB68_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB68_1; ; SM70-NEXT: $L__BB68_3: // %partword.cmpxchg.end @@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB69_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB69_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB69_1; ; SM70-NEXT: $L__BB69_3: // %partword.cmpxchg.end @@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB70_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB70_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB70_1; ; SM70-NEXT: $L__BB70_3: // %partword.cmpxchg.end @@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB71_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB71_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB71_1; ; SM70-NEXT: $L__BB71_3: // %partword.cmpxchg.end @@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB72_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB72_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB72_1; ; SM70-NEXT: $L__BB72_3: // %partword.cmpxchg.end @@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB73_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB73_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB73_1; ; SM70-NEXT: $L__BB73_3: // %partword.cmpxchg.end @@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB74_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB74_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB74_1; ; SM70-NEXT: $L__BB74_3: // %partword.cmpxchg.end @@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB75_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB75_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB75_1; ; SM70-NEXT: $L__BB75_3: // %partword.cmpxchg.end @@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB76_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB76_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB76_1; ; SM70-NEXT: $L__BB76_3: // %partword.cmpxchg.end @@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB77_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB77_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB77_1; ; SM70-NEXT: $L__BB77_3: // %partword.cmpxchg.end @@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB78_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB78_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB78_1; ; SM70-NEXT: $L__BB78_3: // %partword.cmpxchg.end @@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB79_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB79_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB79_1; ; SM70-NEXT: $L__BB79_3: // %partword.cmpxchg.end @@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB80_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB80_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB80_1; ; SM70-NEXT: $L__BB80_3: // %partword.cmpxchg.end @@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB81_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB81_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB81_1; ; SM70-NEXT: $L__BB81_3: // %partword.cmpxchg.end @@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB82_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB82_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB82_1; ; SM70-NEXT: $L__BB82_3: // %partword.cmpxchg.end @@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB83_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB83_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB83_1; ; SM70-NEXT: $L__BB83_3: // %partword.cmpxchg.end @@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB84_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB84_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB84_1; ; SM70-NEXT: $L__BB84_3: // %partword.cmpxchg.end @@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB85_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB85_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB85_1; ; SM70-NEXT: $L__BB85_3: // %partword.cmpxchg.end @@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB86_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB86_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB86_1; ; SM70-NEXT: $L__BB86_3: // %partword.cmpxchg.end @@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB87_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB87_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB87_1; ; SM70-NEXT: $L__BB87_3: // %partword.cmpxchg.end @@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB88_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB88_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB88_1; ; SM70-NEXT: $L__BB88_3: // %partword.cmpxchg.end @@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB89_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB89_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB89_1; ; SM70-NEXT: $L__BB89_3: // %partword.cmpxchg.end diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll index f289c3cf3d509..fcc6a4cbe7b58 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll @@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB0_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB0_1; ; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end @@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB1_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB1_1; ; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end @@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB2_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB2_1; ; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end @@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB3_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB3_1; ; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end @@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB4_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB4_1; ; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end @@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB5_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB5_1; ; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end @@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB6_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB6_1; ; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end @@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB7_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB7_1; ; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end @@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB8_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB8_1; ; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end @@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB9_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB9_1; ; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end @@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB10_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB10_1; ; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end @@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB11_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB11_1; ; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end @@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB12_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB12_1; ; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end @@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB13_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB13_1; ; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end @@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB14_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB14_1; ; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end @@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB15_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB15_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB15_1; ; SM90-NEXT: $L__BB15_3: // %partword.cmpxchg.end @@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB16_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB16_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB16_1; ; SM90-NEXT: $L__BB16_3: // %partword.cmpxchg.end @@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB17_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB17_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB17_1; ; SM90-NEXT: $L__BB17_3: // %partword.cmpxchg.end @@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB18_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB18_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB18_1; ; SM90-NEXT: $L__BB18_3: // %partword.cmpxchg.end @@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB19_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB19_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB19_1; ; SM90-NEXT: $L__BB19_3: // %partword.cmpxchg.end @@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB20_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB20_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB20_1; ; SM90-NEXT: $L__BB20_3: // %partword.cmpxchg.end @@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB21_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB21_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB21_1; ; SM90-NEXT: $L__BB21_3: // %partword.cmpxchg.end @@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB22_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB22_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB22_1; ; SM90-NEXT: $L__BB22_3: // %partword.cmpxchg.end @@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB23_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB23_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB23_1; ; SM90-NEXT: $L__BB23_3: // %partword.cmpxchg.end @@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB24_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB24_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB24_1; ; SM90-NEXT: $L__BB24_3: // %partword.cmpxchg.end @@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB25_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB25_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB25_1; ; SM90-NEXT: $L__BB25_3: // %partword.cmpxchg.end @@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB26_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB26_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB26_1; ; SM90-NEXT: $L__BB26_3: // %partword.cmpxchg.end @@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB27_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB27_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB27_1; ; SM90-NEXT: $L__BB27_3: // %partword.cmpxchg.end @@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB28_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB28_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB28_1; ; SM90-NEXT: $L__BB28_3: // %partword.cmpxchg.end @@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB29_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB29_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB29_1; ; SM90-NEXT: $L__BB29_3: // %partword.cmpxchg.end @@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB30_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB30_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB30_1; ; SM90-NEXT: $L__BB30_3: // %partword.cmpxchg.end @@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB31_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB31_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB31_1; ; SM90-NEXT: $L__BB31_3: // %partword.cmpxchg.end @@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB32_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB32_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB32_1; ; SM90-NEXT: $L__BB32_3: // %partword.cmpxchg.end @@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB33_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB33_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB33_1; ; SM90-NEXT: $L__BB33_3: // %partword.cmpxchg.end @@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB34_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB34_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB34_1; ; SM90-NEXT: $L__BB34_3: // %partword.cmpxchg.end @@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB35_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB35_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB35_1; ; SM90-NEXT: $L__BB35_3: // %partword.cmpxchg.end @@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB36_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB36_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB36_1; ; SM90-NEXT: $L__BB36_3: // %partword.cmpxchg.end @@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB37_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB37_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB37_1; ; SM90-NEXT: $L__BB37_3: // %partword.cmpxchg.end @@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB38_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB38_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB38_1; ; SM90-NEXT: $L__BB38_3: // %partword.cmpxchg.end @@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB39_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB39_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB39_1; ; SM90-NEXT: $L__BB39_3: // %partword.cmpxchg.end @@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB40_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB40_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB40_1; ; SM90-NEXT: $L__BB40_3: // %partword.cmpxchg.end @@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB41_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB41_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB41_1; ; SM90-NEXT: $L__BB41_3: // %partword.cmpxchg.end @@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB42_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB42_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB42_1; ; SM90-NEXT: $L__BB42_3: // %partword.cmpxchg.end @@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB43_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB43_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB43_1; ; SM90-NEXT: $L__BB43_3: // %partword.cmpxchg.end @@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: or.b32 %r17, %r20, %r3; ; SM90-NEXT: or.b32 %r18, %r20, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM90-NEXT: @%p1 bra $L__BB44_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB44_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB44_1; ; SM90-NEXT: $L__BB44_3: // %partword.cmpxchg.end @@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB45_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB45_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB45_1; ; SM90-NEXT: $L__BB45_3: // %partword.cmpxchg.end @@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB46_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB46_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB46_1; ; SM90-NEXT: $L__BB46_3: // %partword.cmpxchg.end @@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB47_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB47_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB47_1; ; SM90-NEXT: $L__BB47_3: // %partword.cmpxchg.end @@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB48_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB48_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB48_1; ; SM90-NEXT: $L__BB48_3: // %partword.cmpxchg.end @@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB49_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB49_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB49_1; ; SM90-NEXT: $L__BB49_3: // %partword.cmpxchg.end @@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB50_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB50_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB50_1; ; SM90-NEXT: $L__BB50_3: // %partword.cmpxchg.end @@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB51_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB51_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB51_1; ; SM90-NEXT: $L__BB51_3: // %partword.cmpxchg.end @@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB52_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB52_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB52_1; ; SM90-NEXT: $L__BB52_3: // %partword.cmpxchg.end @@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB53_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB53_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB53_1; ; SM90-NEXT: $L__BB53_3: // %partword.cmpxchg.end @@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB54_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB54_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB54_1; ; SM90-NEXT: $L__BB54_3: // %partword.cmpxchg.end @@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB55_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB55_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB55_1; ; SM90-NEXT: $L__BB55_3: // %partword.cmpxchg.end @@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB56_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB56_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB56_1; ; SM90-NEXT: $L__BB56_3: // %partword.cmpxchg.end @@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB57_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB57_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB57_1; ; SM90-NEXT: $L__BB57_3: // %partword.cmpxchg.end @@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB58_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB58_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB58_1; ; SM90-NEXT: $L__BB58_3: // %partword.cmpxchg.end @@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB59_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB59_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB59_1; ; SM90-NEXT: $L__BB59_3: // %partword.cmpxchg.end @@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB60_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB60_1; ; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end @@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB61_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB61_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB61_1; ; SM90-NEXT: $L__BB61_3: // %partword.cmpxchg.end @@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB62_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB62_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB62_1; ; SM90-NEXT: $L__BB62_3: // %partword.cmpxchg.end @@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB63_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB63_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB63_1; ; SM90-NEXT: $L__BB63_3: // %partword.cmpxchg.end @@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB64_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB64_1; ; SM90-NEXT: $L__BB64_3: // %partword.cmpxchg.end @@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB65_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB65_1; ; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end @@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB66_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB66_1; ; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end @@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB67_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB67_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB67_1; ; SM90-NEXT: $L__BB67_3: // %partword.cmpxchg.end @@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB68_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB68_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB68_1; ; SM90-NEXT: $L__BB68_3: // %partword.cmpxchg.end @@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB69_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB69_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB69_1; ; SM90-NEXT: $L__BB69_3: // %partword.cmpxchg.end @@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB70_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB70_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB70_1; ; SM90-NEXT: $L__BB70_3: // %partword.cmpxchg.end @@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB71_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB71_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB71_1; ; SM90-NEXT: $L__BB71_3: // %partword.cmpxchg.end @@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB72_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB72_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB72_1; ; SM90-NEXT: $L__BB72_3: // %partword.cmpxchg.end @@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB73_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB73_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB73_1; ; SM90-NEXT: $L__BB73_3: // %partword.cmpxchg.end @@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB74_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB74_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB74_1; ; SM90-NEXT: $L__BB74_3: // %partword.cmpxchg.end @@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB75_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB75_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB75_1; ; SM90-NEXT: $L__BB75_3: // %partword.cmpxchg.end @@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB76_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB76_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB76_1; ; SM90-NEXT: $L__BB76_3: // %partword.cmpxchg.end @@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB77_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB77_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB77_1; ; SM90-NEXT: $L__BB77_3: // %partword.cmpxchg.end @@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB78_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB78_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB78_1; ; SM90-NEXT: $L__BB78_3: // %partword.cmpxchg.end @@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB79_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB79_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB79_1; ; SM90-NEXT: $L__BB79_3: // %partword.cmpxchg.end @@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB80_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB80_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB80_1; ; SM90-NEXT: $L__BB80_3: // %partword.cmpxchg.end @@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB81_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB81_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB81_1; ; SM90-NEXT: $L__BB81_3: // %partword.cmpxchg.end @@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB82_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB82_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB82_1; ; SM90-NEXT: $L__BB82_3: // %partword.cmpxchg.end @@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB83_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB83_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB83_1; ; SM90-NEXT: $L__BB83_3: // %partword.cmpxchg.end @@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB84_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB84_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB84_1; ; SM90-NEXT: $L__BB84_3: // %partword.cmpxchg.end @@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB85_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB85_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB85_1; ; SM90-NEXT: $L__BB85_3: // %partword.cmpxchg.end @@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB86_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB86_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB86_1; ; SM90-NEXT: $L__BB86_3: // %partword.cmpxchg.end @@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB87_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB87_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB87_1; ; SM90-NEXT: $L__BB87_3: // %partword.cmpxchg.end @@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB88_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB88_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB88_1; ; SM90-NEXT: $L__BB88_3: // %partword.cmpxchg.end @@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: or.b32 %r16, %r19, %r3; ; SM90-NEXT: or.b32 %r17, %r19, %r4; ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; -; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM90-NEXT: @%p1 bra $L__BB89_3; ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM90-NEXT: // in Loop: Header=BB89_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; -; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB89_1; ; SM90-NEXT: $L__BB89_3: // %partword.cmpxchg.end diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll index 9eeff9d7c2b75..e3b4d38972325 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll @@ -39,12 +39,12 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: or.b32 %r17, %r20, %r3; ; SM30-NEXT: or.b32 %r18, %r20, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM30-NEXT: @%p1 bra $L__BB0_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB0_1; ; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end @@ -80,12 +80,12 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB0_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end @@ -165,12 +165,12 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: or.b32 %r17, %r20, %r3; ; SM30-NEXT: or.b32 %r18, %r20, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM30-NEXT: @%p1 bra $L__BB1_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB1_1; ; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end @@ -207,12 +207,12 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB1_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB1_1; ; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end @@ -295,12 +295,12 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: or.b32 %r17, %r20, %r3; ; SM30-NEXT: or.b32 %r18, %r20, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM30-NEXT: @%p1 bra $L__BB2_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB2_1; ; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end @@ -337,12 +337,12 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB2_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB2_1; ; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end @@ -424,12 +424,12 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: or.b32 %r17, %r20, %r3; ; SM30-NEXT: or.b32 %r18, %r20, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM30-NEXT: @%p1 bra $L__BB3_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB3_1; ; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end @@ -467,12 +467,12 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB3_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB3_1; ; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end @@ -556,12 +556,12 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: or.b32 %r17, %r20, %r3; ; SM30-NEXT: or.b32 %r18, %r20, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM30-NEXT: @%p1 bra $L__BB4_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB4_1; ; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end @@ -599,12 +599,12 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: or.b32 %r17, %r20, %r3; ; SM70-NEXT: or.b32 %r18, %r20, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB4_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8; ; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB4_1; ; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end @@ -687,12 +687,12 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: or.b32 %r16, %r19, %r3; ; SM30-NEXT: or.b32 %r17, %r19, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM30-NEXT: @%p1 bra $L__BB5_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB5_1; ; SM30-NEXT: $L__BB5_3: // %partword.cmpxchg.end @@ -727,12 +727,12 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB5_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB5_1; ; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end @@ -810,12 +810,12 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: or.b32 %r16, %r19, %r3; ; SM30-NEXT: or.b32 %r17, %r19, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM30-NEXT: @%p1 bra $L__BB6_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB6_1; ; SM30-NEXT: $L__BB6_3: // %partword.cmpxchg.end @@ -851,12 +851,12 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB6_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB6_1; ; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end @@ -937,12 +937,12 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: or.b32 %r16, %r19, %r3; ; SM30-NEXT: or.b32 %r17, %r19, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM30-NEXT: @%p1 bra $L__BB7_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB7_1; ; SM30-NEXT: $L__BB7_3: // %partword.cmpxchg.end @@ -978,12 +978,12 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB7_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB7_1; ; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end @@ -1063,12 +1063,12 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: or.b32 %r16, %r19, %r3; ; SM30-NEXT: or.b32 %r17, %r19, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM30-NEXT: @%p1 bra $L__BB8_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB8_1; ; SM30-NEXT: $L__BB8_3: // %partword.cmpxchg.end @@ -1105,12 +1105,12 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB8_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB8_1; ; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end @@ -1193,12 +1193,12 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: or.b32 %r16, %r19, %r3; ; SM30-NEXT: or.b32 %r17, %r19, %r4; ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; -; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM30-NEXT: @%p1 bra $L__BB9_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; -; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB9_1; ; SM30-NEXT: $L__BB9_3: // %partword.cmpxchg.end @@ -1235,12 +1235,12 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: or.b32 %r16, %r19, %r3; ; SM70-NEXT: or.b32 %r17, %r19, %r4; ; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; -; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17; ; SM70-NEXT: @%p1 bra $L__BB9_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; -; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8; ; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB9_1; ; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll index ee86fe97ef781..b44ae47d623bd 100644 --- a/llvm/test/CodeGen/NVPTX/compare-int.ll +++ b/llvm/test/CodeGen/NVPTX/compare-int.ll @@ -11,7 +11,7 @@ ;;; i64 define i64 @icmp_eq_i64(i64 %a, i64 %b) { -; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} +; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} ; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i64 %a, %b @@ -20,7 +20,7 @@ define i64 @icmp_eq_i64(i64 %a, i64 %b) { } define i64 @icmp_ne_i64(i64 %a, i64 %b) { -; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} +; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} ; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i64 %a, %b @@ -103,7 +103,7 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) { ;;; i32 define i32 @icmp_eq_i32(i32 %a, i32 %b) { -; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i32 %a, %b @@ -112,7 +112,7 @@ define i32 @icmp_eq_i32(i32 %a, i32 %b) { } define i32 @icmp_ne_i32(i32 %a, i32 %b) { -; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i32 %a, %b @@ -196,7 +196,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) { ;;; i16 define i16 @icmp_eq_i16(i16 %a, i16 %b) { -; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i16 %a, %b @@ -205,7 +205,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) { } define i16 @icmp_ne_i16(i16 %a, i16 %b) { -; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i16 %a, %b @@ -290,7 +290,7 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) { define i8 @icmp_eq_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i8 %a, %b @@ -300,7 +300,7 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) { define i8 @icmp_ne_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} +; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i8 %a, %b diff --git a/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll b/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll index 193cf674ecdfc..a1020e68e1bae 100644 --- a/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll +++ b/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll @@ -210,12 +210,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: or.b32 %r39, %r48, %r3; ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r6, [%rd1], %r39, %r48; -; CHECK-NEXT: setp.eq.s32 %p1, %r6, %r39; +; CHECK-NEXT: setp.eq.b32 %p1, %r6, %r39; ; CHECK-NEXT: @%p1 bra $L__BB4_3; ; CHECK-NEXT: // %bb.2: // %partword.cmpxchg.failure32 ; CHECK-NEXT: // in Loop: Header=BB4_1 Depth=1 ; CHECK-NEXT: and.b32 %r7, %r6, %r2; -; CHECK-NEXT: setp.ne.s32 %p2, %r48, %r7; +; CHECK-NEXT: setp.ne.b32 %p2, %r48, %r7; ; CHECK-NEXT: mov.b32 %r48, %r7; ; CHECK-NEXT: @%p2 bra $L__BB4_1; ; CHECK-NEXT: $L__BB4_3: // %partword.cmpxchg.end31 @@ -225,12 +225,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: or.b32 %r41, %r49, %r3; ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r10, [%rd1], %r41, %r49; -; CHECK-NEXT: setp.eq.s32 %p3, %r10, %r41; +; CHECK-NEXT: setp.eq.b32 %p3, %r10, %r41; ; CHECK-NEXT: @%p3 bra $L__BB4_6; ; CHECK-NEXT: // %bb.5: // %partword.cmpxchg.failure22 ; CHECK-NEXT: // in Loop: Header=BB4_4 Depth=1 ; CHECK-NEXT: and.b32 %r11, %r10, %r2; -; CHECK-NEXT: setp.ne.s32 %p4, %r49, %r11; +; CHECK-NEXT: setp.ne.b32 %p4, %r49, %r11; ; CHECK-NEXT: mov.b32 %r49, %r11; ; CHECK-NEXT: @%p4 bra $L__BB4_4; ; CHECK-NEXT: $L__BB4_6: // %partword.cmpxchg.end21 @@ -242,12 +242,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: or.b32 %r43, %r50, %r3; ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r14, [%rd1], %r43, %r50; -; CHECK-NEXT: setp.eq.s32 %p5, %r14, %r43; +; CHECK-NEXT: setp.eq.b32 %p5, %r14, %r43; ; CHECK-NEXT: @%p5 bra $L__BB4_9; ; CHECK-NEXT: // %bb.8: // %partword.cmpxchg.failure12 ; CHECK-NEXT: // in Loop: Header=BB4_7 Depth=1 ; CHECK-NEXT: and.b32 %r15, %r14, %r2; -; CHECK-NEXT: setp.ne.s32 %p6, %r50, %r15; +; CHECK-NEXT: setp.ne.b32 %p6, %r50, %r15; ; CHECK-NEXT: mov.b32 %r50, %r15; ; CHECK-NEXT: @%p6 bra $L__BB4_7; ; CHECK-NEXT: $L__BB4_9: // %partword.cmpxchg.end11 @@ -258,12 +258,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: or.b32 %r45, %r51, %r3; ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r18, [%rd1], %r45, %r51; -; CHECK-NEXT: setp.eq.s32 %p7, %r18, %r45; +; CHECK-NEXT: setp.eq.b32 %p7, %r18, %r45; ; CHECK-NEXT: @%p7 bra $L__BB4_12; ; CHECK-NEXT: // %bb.11: // %partword.cmpxchg.failure2 ; CHECK-NEXT: // in Loop: Header=BB4_10 Depth=1 ; CHECK-NEXT: and.b32 %r19, %r18, %r2; -; CHECK-NEXT: setp.ne.s32 %p8, %r51, %r19; +; CHECK-NEXT: setp.ne.b32 %p8, %r51, %r19; ; CHECK-NEXT: mov.b32 %r51, %r19; ; CHECK-NEXT: @%p8 bra $L__BB4_10; ; CHECK-NEXT: $L__BB4_12: // %partword.cmpxchg.end1 @@ -275,12 +275,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: or.b32 %r47, %r52, %r3; ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r22, [%rd1], %r47, %r52; -; CHECK-NEXT: setp.eq.s32 %p9, %r22, %r47; +; CHECK-NEXT: setp.eq.b32 %p9, %r22, %r47; ; CHECK-NEXT: @%p9 bra $L__BB4_15; ; CHECK-NEXT: // %bb.14: // %partword.cmpxchg.failure ; CHECK-NEXT: // in Loop: Header=BB4_13 Depth=1 ; CHECK-NEXT: and.b32 %r23, %r22, %r2; -; CHECK-NEXT: setp.ne.s32 %p10, %r52, %r23; +; CHECK-NEXT: setp.ne.b32 %p10, %r52, %r23; ; CHECK-NEXT: mov.b32 %r52, %r23; ; CHECK-NEXT: @%p10 bra $L__BB4_13; ; CHECK-NEXT: $L__BB4_15: // %partword.cmpxchg.end diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll index b1eadf381d3b4..9548b472f93e8 100644 --- a/llvm/test/CodeGen/NVPTX/extractelement.ll +++ b/llvm/test/CodeGen/NVPTX/extractelement.ll @@ -40,7 +40,7 @@ define i1 @test_v2i8_load(ptr %a) { ; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd1]; ; CHECK-NEXT: or.b16 %rs5, %rs1, %rs2; ; CHECK-NEXT: and.b16 %rs6, %rs5, 255; -; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0; +; CHECK-NEXT: setp.eq.b16 %p1, %rs6, 0; ; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll index 13f1c2f30b830..2b7e4184670c7 100644 --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -880,8 +880,9 @@ define half @test_sqrt(half %a) #0 { ; CHECK-LABEL: test_sin( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; -; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]]; -; CHECK: sin.approx.f32 [[RF:%r[0-9]+]], [[AF]]; +; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]]; +; CHECK-NOF16: sin.approx.f32 [[RF:%r[0-9]+]], [[AF]]; +; CHECK-F16-FTZ: sin.approx.ftz.f32 [[RF:%r[0-9]+]], [[AF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; @@ -893,8 +894,9 @@ define half @test_sin(half %a) #0 #1 { ; CHECK-LABEL: test_cos( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; -; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]]; -; CHECK: cos.approx.f32 [[RF:%r[0-9]+]], [[AF]]; +; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]]; +; CHECK-NOF16: cos.approx.f32 [[RF:%r[0-9]+]], [[AF]]; +; CHECK-F16-FTZ: cos.approx.ftz.f32 [[RF:%r[0-9]+]], [[AF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 43a605f2b34d7..093bc20547b85 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -83,7 +83,7 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; ; CHECK-NEXT: ld.param.b32 %r1, [test_extract_i_param_0]; -; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0; +; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0; ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll index bc48d242f88fd..5eda3a1e2dda1 100644 --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -291,6 +291,34 @@ define float @fcos_approx(float %a) #0 { ret float %r } +define float @fsin_approx_ftz(float %a) #0 #1 { +; CHECK-LABEL: fsin_approx_ftz( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [fsin_approx_ftz_param_0]; +; CHECK-NEXT: sin.approx.ftz.f32 %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %r = tail call float @llvm.sin.f32(float %a) + ret float %r +} + +define float @fcos_approx_ftz(float %a) #0 #1 { +; CHECK-LABEL: fcos_approx_ftz( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [fcos_approx_ftz_param_0]; +; CHECK-NEXT: cos.approx.ftz.f32 %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %r = tail call float @llvm.cos.f32(float %a) + ret float %r +} + define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) { ; CHECK-LABEL: repeated_div_recip_allowed( ; CHECK: { diff --git a/llvm/test/CodeGen/NVPTX/i1-select.ll b/llvm/test/CodeGen/NVPTX/i1-select.ll index 6fb5aad4b1eb9..9a051b3fd8bb7 100644 --- a/llvm/test/CodeGen/NVPTX/i1-select.ll +++ b/llvm/test/CodeGen/NVPTX/i1-select.ll @@ -72,11 +72,11 @@ define i32 @test_select_i1_basic(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %fals ; CHECK-NEXT: ld.param.b32 %r1, [test_select_i1_basic_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_select_i1_basic_param_1]; ; CHECK-NEXT: or.b32 %r4, %r1, %r2; -; CHECK-NEXT: setp.ne.s32 %p1, %r1, 0; +; CHECK-NEXT: setp.ne.b32 %p1, %r1, 0; ; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_param_2]; -; CHECK-NEXT: setp.eq.s32 %p2, %r5, 0; +; CHECK-NEXT: setp.eq.b32 %p2, %r5, 0; ; CHECK-NEXT: ld.param.b32 %r7, [test_select_i1_basic_param_3]; -; CHECK-NEXT: setp.eq.s32 %p3, %r4, 0; +; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0; ; CHECK-NEXT: ld.param.b32 %r8, [test_select_i1_basic_param_4]; ; CHECK-NEXT: selp.b32 %r9, %r7, %r8, %p2; ; CHECK-NEXT: selp.b32 %r10, %r9, %r8, %p1; @@ -99,12 +99,12 @@ define i32 @test_select_i1_basic_folding(i32 %v1, i32 %v2, i32 %v3, i32 %true, i ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_select_i1_basic_folding_param_0]; -; CHECK-NEXT: setp.eq.s32 %p1, %r1, 0; +; CHECK-NEXT: setp.eq.b32 %p1, %r1, 0; ; CHECK-NEXT: ld.param.b32 %r2, [test_select_i1_basic_folding_param_1]; -; CHECK-NEXT: setp.ne.s32 %p2, %r2, 0; -; CHECK-NEXT: setp.eq.s32 %p3, %r2, 0; +; CHECK-NEXT: setp.ne.b32 %p2, %r2, 0; +; CHECK-NEXT: setp.eq.b32 %p3, %r2, 0; ; CHECK-NEXT: ld.param.b32 %r3, [test_select_i1_basic_folding_param_2]; -; CHECK-NEXT: setp.eq.s32 %p4, %r3, 0; +; CHECK-NEXT: setp.eq.b32 %p4, %r3, 0; ; CHECK-NEXT: ld.param.b32 %r4, [test_select_i1_basic_folding_param_3]; ; CHECK-NEXT: xor.pred %p6, %p1, %p3; ; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_folding_param_4]; diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll index 29408a24213cc..44d85589b5056 100644 --- a/llvm/test/CodeGen/NVPTX/i128.ll +++ b/llvm/test/CodeGen/NVPTX/i128.ll @@ -24,18 +24,18 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: selp.b64 %rd6, %rd54, %rd50, %p2; ; CHECK-NEXT: selp.b64 %rd5, %rd53, %rd49, %p2; ; CHECK-NEXT: or.b64 %rd55, %rd5, %rd6; -; CHECK-NEXT: setp.eq.s64 %p3, %rd55, 0; +; CHECK-NEXT: setp.eq.b64 %p3, %rd55, 0; ; CHECK-NEXT: or.b64 %rd56, %rd3, %rd4; -; CHECK-NEXT: setp.eq.s64 %p4, %rd56, 0; +; CHECK-NEXT: setp.eq.b64 %p4, %rd56, 0; ; CHECK-NEXT: or.pred %p5, %p3, %p4; -; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0; +; CHECK-NEXT: setp.ne.b64 %p6, %rd6, 0; ; CHECK-NEXT: clz.b64 %r1, %rd6; ; CHECK-NEXT: cvt.u64.u32 %rd57, %r1; ; CHECK-NEXT: clz.b64 %r2, %rd5; ; CHECK-NEXT: cvt.u64.u32 %rd58, %r2; ; CHECK-NEXT: add.s64 %rd59, %rd58, 64; ; CHECK-NEXT: selp.b64 %rd60, %rd57, %rd59, %p6; -; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0; +; CHECK-NEXT: setp.ne.b64 %p7, %rd4, 0; ; CHECK-NEXT: clz.b64 %r3, %rd4; ; CHECK-NEXT: cvt.u64.u32 %rd61, %r3; ; CHECK-NEXT: clz.b64 %r4, %rd3; @@ -46,14 +46,14 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: sub.cc.s64 %rd66, %rd60, %rd64; ; CHECK-NEXT: subc.cc.s64 %rd67, %rd117, 0; ; CHECK-NEXT: setp.gt.u64 %p8, %rd66, 127; -; CHECK-NEXT: setp.eq.s64 %p9, %rd67, 0; +; CHECK-NEXT: setp.eq.b64 %p9, %rd67, 0; ; CHECK-NEXT: and.pred %p10, %p9, %p8; -; CHECK-NEXT: setp.ne.s64 %p11, %rd67, 0; +; CHECK-NEXT: setp.ne.b64 %p11, %rd67, 0; ; CHECK-NEXT: or.pred %p12, %p10, %p11; ; CHECK-NEXT: or.pred %p13, %p5, %p12; ; CHECK-NEXT: xor.b64 %rd68, %rd66, 127; ; CHECK-NEXT: or.b64 %rd69, %rd68, %rd67; -; CHECK-NEXT: setp.eq.s64 %p14, %rd69, 0; +; CHECK-NEXT: setp.eq.b64 %p14, %rd69, 0; ; CHECK-NEXT: selp.b64 %rd126, 0, %rd4, %p13; ; CHECK-NEXT: selp.b64 %rd125, 0, %rd3, %p13; ; CHECK-NEXT: or.pred %p15, %p13, %p14; @@ -62,7 +62,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd119, %rd66, 1; ; CHECK-NEXT: addc.cc.s64 %rd120, %rd67, 0; ; CHECK-NEXT: or.b64 %rd72, %rd119, %rd120; -; CHECK-NEXT: setp.eq.s64 %p16, %rd72, 0; +; CHECK-NEXT: setp.eq.b64 %p16, %rd72, 0; ; CHECK-NEXT: cvt.u32.u64 %r5, %rd66; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; ; CHECK-NEXT: shl.b64 %rd73, %rd4, %r6; @@ -116,7 +116,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd119, %rd119, -1; ; CHECK-NEXT: addc.cc.s64 %rd120, %rd120, -1; ; CHECK-NEXT: or.b64 %rd98, %rd119, %rd120; -; CHECK-NEXT: setp.eq.s64 %p19, %rd98, 0; +; CHECK-NEXT: setp.eq.b64 %p19, %rd98, 0; ; CHECK-NEXT: @%p19 bra $L__BB0_4; ; CHECK-NEXT: bra.uni $L__BB0_2; ; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit @@ -154,18 +154,18 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: ld.param.v2.b64 {%rd41, %rd42}, [urem_i128_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [urem_i128_param_1]; ; CHECK-NEXT: or.b64 %rd45, %rd3, %rd4; -; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; +; CHECK-NEXT: setp.eq.b64 %p1, %rd45, 0; ; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; -; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; +; CHECK-NEXT: setp.eq.b64 %p2, %rd46, 0; ; CHECK-NEXT: or.pred %p3, %p1, %p2; -; CHECK-NEXT: setp.ne.s64 %p4, %rd4, 0; +; CHECK-NEXT: setp.ne.b64 %p4, %rd4, 0; ; CHECK-NEXT: clz.b64 %r1, %rd4; ; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; ; CHECK-NEXT: clz.b64 %r2, %rd3; ; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; ; CHECK-NEXT: add.s64 %rd49, %rd48, 64; ; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; -; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; +; CHECK-NEXT: setp.ne.b64 %p5, %rd42, 0; ; CHECK-NEXT: clz.b64 %r3, %rd42; ; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; ; CHECK-NEXT: clz.b64 %r4, %rd41; @@ -176,14 +176,14 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; ; CHECK-NEXT: subc.cc.s64 %rd57, %rd103, 0; ; CHECK-NEXT: setp.gt.u64 %p6, %rd56, 127; -; CHECK-NEXT: setp.eq.s64 %p7, %rd57, 0; +; CHECK-NEXT: setp.eq.b64 %p7, %rd57, 0; ; CHECK-NEXT: and.pred %p8, %p7, %p6; -; CHECK-NEXT: setp.ne.s64 %p9, %rd57, 0; +; CHECK-NEXT: setp.ne.b64 %p9, %rd57, 0; ; CHECK-NEXT: or.pred %p10, %p8, %p9; ; CHECK-NEXT: or.pred %p11, %p3, %p10; ; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; ; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; -; CHECK-NEXT: setp.eq.s64 %p12, %rd59, 0; +; CHECK-NEXT: setp.eq.b64 %p12, %rd59, 0; ; CHECK-NEXT: selp.b64 %rd112, 0, %rd42, %p11; ; CHECK-NEXT: selp.b64 %rd111, 0, %rd41, %p11; ; CHECK-NEXT: or.pred %p13, %p11, %p12; @@ -192,7 +192,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd105, %rd56, 1; ; CHECK-NEXT: addc.cc.s64 %rd106, %rd57, 0; ; CHECK-NEXT: or.b64 %rd62, %rd105, %rd106; -; CHECK-NEXT: setp.eq.s64 %p14, %rd62, 0; +; CHECK-NEXT: setp.eq.b64 %p14, %rd62, 0; ; CHECK-NEXT: cvt.u32.u64 %r5, %rd56; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; ; CHECK-NEXT: shl.b64 %rd63, %rd42, %r6; @@ -246,7 +246,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd105, %rd105, -1; ; CHECK-NEXT: addc.cc.s64 %rd106, %rd106, -1; ; CHECK-NEXT: or.b64 %rd88, %rd105, %rd106; -; CHECK-NEXT: setp.eq.s64 %p17, %rd88, 0; +; CHECK-NEXT: setp.eq.b64 %p17, %rd88, 0; ; CHECK-NEXT: @%p17 bra $L__BB1_4; ; CHECK-NEXT: bra.uni $L__BB1_2; ; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit @@ -326,18 +326,18 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: xor.b64 %rd55, %rd50, %rd46; ; CHECK-NEXT: shr.s64 %rd5, %rd55, 63; ; CHECK-NEXT: or.b64 %rd56, %rd3, %rd4; -; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0; +; CHECK-NEXT: setp.eq.b64 %p3, %rd56, 0; ; CHECK-NEXT: or.b64 %rd57, %rd1, %rd2; -; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0; +; CHECK-NEXT: setp.eq.b64 %p4, %rd57, 0; ; CHECK-NEXT: or.pred %p5, %p3, %p4; -; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0; +; CHECK-NEXT: setp.ne.b64 %p6, %rd4, 0; ; CHECK-NEXT: clz.b64 %r1, %rd4; ; CHECK-NEXT: cvt.u64.u32 %rd58, %r1; ; CHECK-NEXT: clz.b64 %r2, %rd3; ; CHECK-NEXT: cvt.u64.u32 %rd59, %r2; ; CHECK-NEXT: add.s64 %rd60, %rd59, 64; ; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6; -; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0; +; CHECK-NEXT: setp.ne.b64 %p7, %rd2, 0; ; CHECK-NEXT: clz.b64 %r3, %rd2; ; CHECK-NEXT: cvt.u64.u32 %rd62, %r3; ; CHECK-NEXT: clz.b64 %r4, %rd1; @@ -348,14 +348,14 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: sub.cc.s64 %rd67, %rd61, %rd65; ; CHECK-NEXT: subc.cc.s64 %rd68, %rd112, 0; ; CHECK-NEXT: setp.gt.u64 %p8, %rd67, 127; -; CHECK-NEXT: setp.eq.s64 %p9, %rd68, 0; +; CHECK-NEXT: setp.eq.b64 %p9, %rd68, 0; ; CHECK-NEXT: and.pred %p10, %p9, %p8; -; CHECK-NEXT: setp.ne.s64 %p11, %rd68, 0; +; CHECK-NEXT: setp.ne.b64 %p11, %rd68, 0; ; CHECK-NEXT: or.pred %p12, %p10, %p11; ; CHECK-NEXT: or.pred %p13, %p5, %p12; ; CHECK-NEXT: xor.b64 %rd69, %rd67, 127; ; CHECK-NEXT: or.b64 %rd70, %rd69, %rd68; -; CHECK-NEXT: setp.eq.s64 %p14, %rd70, 0; +; CHECK-NEXT: setp.eq.b64 %p14, %rd70, 0; ; CHECK-NEXT: selp.b64 %rd121, 0, %rd2, %p13; ; CHECK-NEXT: selp.b64 %rd120, 0, %rd1, %p13; ; CHECK-NEXT: or.pred %p15, %p13, %p14; @@ -364,7 +364,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd114, %rd67, 1; ; CHECK-NEXT: addc.cc.s64 %rd115, %rd68, 0; ; CHECK-NEXT: or.b64 %rd73, %rd114, %rd115; -; CHECK-NEXT: setp.eq.s64 %p16, %rd73, 0; +; CHECK-NEXT: setp.eq.b64 %p16, %rd73, 0; ; CHECK-NEXT: cvt.u32.u64 %r5, %rd67; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; ; CHECK-NEXT: shl.b64 %rd74, %rd2, %r6; @@ -418,7 +418,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd114, %rd114, -1; ; CHECK-NEXT: addc.cc.s64 %rd115, %rd115, -1; ; CHECK-NEXT: or.b64 %rd99, %rd114, %rd115; -; CHECK-NEXT: setp.eq.s64 %p19, %rd99, 0; +; CHECK-NEXT: setp.eq.b64 %p19, %rd99, 0; ; CHECK-NEXT: @%p19 bra $L__BB4_4; ; CHECK-NEXT: bra.uni $L__BB4_2; ; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit @@ -450,18 +450,18 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: ld.param.v2.b64 {%rd41, %rd42}, [udiv_i128_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd43, %rd44}, [udiv_i128_param_1]; ; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44; -; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; +; CHECK-NEXT: setp.eq.b64 %p1, %rd45, 0; ; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; -; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; +; CHECK-NEXT: setp.eq.b64 %p2, %rd46, 0; ; CHECK-NEXT: or.pred %p3, %p1, %p2; -; CHECK-NEXT: setp.ne.s64 %p4, %rd44, 0; +; CHECK-NEXT: setp.ne.b64 %p4, %rd44, 0; ; CHECK-NEXT: clz.b64 %r1, %rd44; ; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; ; CHECK-NEXT: clz.b64 %r2, %rd43; ; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; ; CHECK-NEXT: add.s64 %rd49, %rd48, 64; ; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; -; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; +; CHECK-NEXT: setp.ne.b64 %p5, %rd42, 0; ; CHECK-NEXT: clz.b64 %r3, %rd42; ; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; ; CHECK-NEXT: clz.b64 %r4, %rd41; @@ -472,14 +472,14 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; ; CHECK-NEXT: subc.cc.s64 %rd57, %rd97, 0; ; CHECK-NEXT: setp.gt.u64 %p6, %rd56, 127; -; CHECK-NEXT: setp.eq.s64 %p7, %rd57, 0; +; CHECK-NEXT: setp.eq.b64 %p7, %rd57, 0; ; CHECK-NEXT: and.pred %p8, %p7, %p6; -; CHECK-NEXT: setp.ne.s64 %p9, %rd57, 0; +; CHECK-NEXT: setp.ne.b64 %p9, %rd57, 0; ; CHECK-NEXT: or.pred %p10, %p8, %p9; ; CHECK-NEXT: or.pred %p11, %p3, %p10; ; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; ; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; -; CHECK-NEXT: setp.eq.s64 %p12, %rd59, 0; +; CHECK-NEXT: setp.eq.b64 %p12, %rd59, 0; ; CHECK-NEXT: selp.b64 %rd106, 0, %rd42, %p11; ; CHECK-NEXT: selp.b64 %rd105, 0, %rd41, %p11; ; CHECK-NEXT: or.pred %p13, %p11, %p12; @@ -488,7 +488,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd99, %rd56, 1; ; CHECK-NEXT: addc.cc.s64 %rd100, %rd57, 0; ; CHECK-NEXT: or.b64 %rd62, %rd99, %rd100; -; CHECK-NEXT: setp.eq.s64 %p14, %rd62, 0; +; CHECK-NEXT: setp.eq.b64 %p14, %rd62, 0; ; CHECK-NEXT: cvt.u32.u64 %r5, %rd56; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; ; CHECK-NEXT: shl.b64 %rd63, %rd42, %r6; @@ -542,7 +542,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd99, %rd99, -1; ; CHECK-NEXT: addc.cc.s64 %rd100, %rd100, -1; ; CHECK-NEXT: or.b64 %rd88, %rd99, %rd100; -; CHECK-NEXT: setp.eq.s64 %p17, %rd88, 0; +; CHECK-NEXT: setp.eq.b64 %p17, %rd88, 0; ; CHECK-NEXT: @%p17 bra $L__BB5_4; ; CHECK-NEXT: bra.uni $L__BB5_2; ; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index e89ab7a5605c3..2b7a06c33d948 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -98,7 +98,7 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 { ; COMMON-NEXT: // %bb.0: ; COMMON-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; ; COMMON-NEXT: ld.param.b32 %r1, [test_extract_i_param_0]; -; COMMON-NEXT: setp.eq.s64 %p1, %rd1, 0; +; COMMON-NEXT: setp.eq.b64 %p1, %rd1, 0; ; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; COMMON-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; COMMON-NEXT: cvt.u32.u16 %r2, %rs3; @@ -735,8 +735,8 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x ; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; ; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r4; ; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r3; -; COMMON-NEXT: setp.ne.s16 %p1, %rs3, %rs1; -; COMMON-NEXT: setp.ne.s16 %p2, %rs4, %rs2; +; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1; +; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2; ; COMMON-NEXT: mov.b32 {%rs5, %rs6}, %r2; ; COMMON-NEXT: mov.b32 {%rs7, %rs8}, %r1; ; COMMON-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2; @@ -762,8 +762,8 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b, ; COMMON-NEXT: ld.param.b32 %r5, [test_select_cc_i32_i16_param_2]; ; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r6; ; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r5; -; COMMON-NEXT: setp.ne.s16 %p1, %rs3, %rs1; -; COMMON-NEXT: setp.ne.s16 %p2, %rs4, %rs2; +; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1; +; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2; ; COMMON-NEXT: selp.b32 %r7, %r2, %r4, %p2; ; COMMON-NEXT: selp.b32 %r8, %r1, %r3, %p1; ; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; @@ -786,8 +786,8 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b, ; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i16_i32_param_2]; ; COMMON-NEXT: ld.param.b32 %r2, [test_select_cc_i16_i32_param_1]; ; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_i16_i32_param_0]; -; COMMON-NEXT: setp.ne.s32 %p1, %r3, %r5; -; COMMON-NEXT: setp.ne.s32 %p2, %r4, %r6; +; COMMON-NEXT: setp.ne.b32 %p1, %r3, %r5; +; COMMON-NEXT: setp.ne.b32 %p2, %r4, %r6; ; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2; ; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1; ; COMMON-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index fd2e56bb126bb..328da60a1f783 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -300,16 +300,16 @@ define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_umax_param_0]; ; CHECK-NEXT: bfe.u32 %r3, %r2, 0, 8; ; CHECK-NEXT: bfe.u32 %r4, %r1, 0, 8; -; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3; +; CHECK-NEXT: setp.gt.u32 %p1, %r4, %r3; ; CHECK-NEXT: bfe.u32 %r5, %r2, 8, 8; ; CHECK-NEXT: bfe.u32 %r6, %r1, 8, 8; -; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5; +; CHECK-NEXT: setp.gt.u32 %p2, %r6, %r5; ; CHECK-NEXT: bfe.u32 %r7, %r2, 16, 8; ; CHECK-NEXT: bfe.u32 %r8, %r1, 16, 8; -; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7; +; CHECK-NEXT: setp.gt.u32 %p3, %r8, %r7; ; CHECK-NEXT: bfe.u32 %r9, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8; -; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9; +; CHECK-NEXT: setp.gt.u32 %p4, %r10, %r9; ; CHECK-NEXT: selp.b32 %r11, %r10, %r9, %p4; ; CHECK-NEXT: selp.b32 %r12, %r8, %r7, %p3; ; CHECK-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U; @@ -378,16 +378,16 @@ define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_umin_param_0]; ; CHECK-NEXT: bfe.u32 %r3, %r2, 0, 8; ; CHECK-NEXT: bfe.u32 %r4, %r1, 0, 8; -; CHECK-NEXT: setp.ls.u32 %p1, %r4, %r3; +; CHECK-NEXT: setp.le.u32 %p1, %r4, %r3; ; CHECK-NEXT: bfe.u32 %r5, %r2, 8, 8; ; CHECK-NEXT: bfe.u32 %r6, %r1, 8, 8; -; CHECK-NEXT: setp.ls.u32 %p2, %r6, %r5; +; CHECK-NEXT: setp.le.u32 %p2, %r6, %r5; ; CHECK-NEXT: bfe.u32 %r7, %r2, 16, 8; ; CHECK-NEXT: bfe.u32 %r8, %r1, 16, 8; -; CHECK-NEXT: setp.ls.u32 %p3, %r8, %r7; +; CHECK-NEXT: setp.le.u32 %p3, %r8, %r7; ; CHECK-NEXT: bfe.u32 %r9, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8; -; CHECK-NEXT: setp.ls.u32 %p4, %r10, %r9; +; CHECK-NEXT: setp.le.u32 %p4, %r10, %r9; ; CHECK-NEXT: selp.b32 %r11, %r10, %r9, %p4; ; CHECK-NEXT: selp.b32 %r12, %r8, %r7, %p3; ; CHECK-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U; @@ -414,16 +414,16 @@ define <4 x i8> @test_eq(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_eq_param_0]; ; CHECK-NEXT: bfe.u32 %r4, %r2, 0, 8; ; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8; -; CHECK-NEXT: setp.eq.u32 %p1, %r5, %r4; +; CHECK-NEXT: setp.eq.b32 %p1, %r5, %r4; ; CHECK-NEXT: bfe.u32 %r6, %r2, 8, 8; ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; -; CHECK-NEXT: setp.eq.u32 %p2, %r7, %r6; +; CHECK-NEXT: setp.eq.b32 %p2, %r7, %r6; ; CHECK-NEXT: bfe.u32 %r8, %r2, 16, 8; ; CHECK-NEXT: bfe.u32 %r9, %r1, 16, 8; -; CHECK-NEXT: setp.eq.u32 %p3, %r9, %r8; +; CHECK-NEXT: setp.eq.b32 %p3, %r9, %r8; ; CHECK-NEXT: bfe.u32 %r10, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r11, %r1, 24, 8; -; CHECK-NEXT: setp.eq.u32 %p4, %r11, %r10; +; CHECK-NEXT: setp.eq.b32 %p4, %r11, %r10; ; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8; ; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4; ; CHECK-NEXT: bfe.u32 %r14, %r3, 16, 8; @@ -454,16 +454,16 @@ define <4 x i8> @test_ne(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_ne_param_0]; ; CHECK-NEXT: bfe.u32 %r4, %r2, 0, 8; ; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8; -; CHECK-NEXT: setp.ne.u32 %p1, %r5, %r4; +; CHECK-NEXT: setp.ne.b32 %p1, %r5, %r4; ; CHECK-NEXT: bfe.u32 %r6, %r2, 8, 8; ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; -; CHECK-NEXT: setp.ne.u32 %p2, %r7, %r6; +; CHECK-NEXT: setp.ne.b32 %p2, %r7, %r6; ; CHECK-NEXT: bfe.u32 %r8, %r2, 16, 8; ; CHECK-NEXT: bfe.u32 %r9, %r1, 16, 8; -; CHECK-NEXT: setp.ne.u32 %p3, %r9, %r8; +; CHECK-NEXT: setp.ne.b32 %p3, %r9, %r8; ; CHECK-NEXT: bfe.u32 %r10, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r11, %r1, 24, 8; -; CHECK-NEXT: setp.ne.u32 %p4, %r11, %r10; +; CHECK-NEXT: setp.ne.b32 %p4, %r11, %r10; ; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8; ; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4; ; CHECK-NEXT: bfe.u32 %r14, %r3, 16, 8; @@ -920,16 +920,16 @@ define <4 x i8> @test_select_cc(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> ; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; ; CHECK-NEXT: bfe.u32 %r5, %r4, 0, 8; ; CHECK-NEXT: bfe.u32 %r6, %r3, 0, 8; -; CHECK-NEXT: setp.ne.u32 %p1, %r6, %r5; +; CHECK-NEXT: setp.ne.b32 %p1, %r6, %r5; ; CHECK-NEXT: bfe.u32 %r7, %r4, 8, 8; ; CHECK-NEXT: bfe.u32 %r8, %r3, 8, 8; -; CHECK-NEXT: setp.ne.u32 %p2, %r8, %r7; +; CHECK-NEXT: setp.ne.b32 %p2, %r8, %r7; ; CHECK-NEXT: bfe.u32 %r9, %r4, 16, 8; ; CHECK-NEXT: bfe.u32 %r10, %r3, 16, 8; -; CHECK-NEXT: setp.ne.u32 %p3, %r10, %r9; +; CHECK-NEXT: setp.ne.b32 %p3, %r10, %r9; ; CHECK-NEXT: bfe.u32 %r11, %r4, 24, 8; ; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8; -; CHECK-NEXT: setp.ne.u32 %p4, %r12, %r11; +; CHECK-NEXT: setp.ne.b32 %p4, %r12, %r11; ; CHECK-NEXT: bfe.u32 %r13, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r14, %r1, 24, 8; ; CHECK-NEXT: selp.b32 %r15, %r14, %r13, %p4; @@ -965,16 +965,16 @@ define <4 x i32> @test_select_cc_i32_i8(<4 x i32> %a, <4 x i32> %b, ; CHECK-NEXT: ld.param.b32 %r9, [test_select_cc_i32_i8_param_2]; ; CHECK-NEXT: bfe.u32 %r11, %r10, 0, 8; ; CHECK-NEXT: bfe.u32 %r12, %r9, 0, 8; -; CHECK-NEXT: setp.ne.u32 %p1, %r12, %r11; +; CHECK-NEXT: setp.ne.b32 %p1, %r12, %r11; ; CHECK-NEXT: bfe.u32 %r13, %r10, 8, 8; ; CHECK-NEXT: bfe.u32 %r14, %r9, 8, 8; -; CHECK-NEXT: setp.ne.u32 %p2, %r14, %r13; +; CHECK-NEXT: setp.ne.b32 %p2, %r14, %r13; ; CHECK-NEXT: bfe.u32 %r15, %r10, 16, 8; ; CHECK-NEXT: bfe.u32 %r16, %r9, 16, 8; -; CHECK-NEXT: setp.ne.u32 %p3, %r16, %r15; +; CHECK-NEXT: setp.ne.b32 %p3, %r16, %r15; ; CHECK-NEXT: bfe.u32 %r17, %r10, 24, 8; ; CHECK-NEXT: bfe.u32 %r18, %r9, 24, 8; -; CHECK-NEXT: setp.ne.u32 %p4, %r18, %r17; +; CHECK-NEXT: setp.ne.b32 %p4, %r18, %r17; ; CHECK-NEXT: selp.b32 %r19, %r4, %r8, %p4; ; CHECK-NEXT: selp.b32 %r20, %r3, %r7, %p3; ; CHECK-NEXT: selp.b32 %r21, %r2, %r6, %p2; @@ -998,10 +998,10 @@ define <4 x i8> @test_select_cc_i8_i32(<4 x i8> %a, <4 x i8> %b, ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [test_select_cc_i8_i32_param_2]; ; CHECK-NEXT: ld.param.b32 %r2, [test_select_cc_i8_i32_param_1]; ; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_i8_i32_param_0]; -; CHECK-NEXT: setp.ne.s32 %p1, %r3, %r7; -; CHECK-NEXT: setp.ne.s32 %p2, %r4, %r8; -; CHECK-NEXT: setp.ne.s32 %p3, %r5, %r9; -; CHECK-NEXT: setp.ne.s32 %p4, %r6, %r10; +; CHECK-NEXT: setp.ne.b32 %p1, %r3, %r7; +; CHECK-NEXT: setp.ne.b32 %p2, %r4, %r8; +; CHECK-NEXT: setp.ne.b32 %p3, %r5, %r9; +; CHECK-NEXT: setp.ne.b32 %p4, %r6, %r10; ; CHECK-NEXT: bfe.u32 %r11, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r12, %r1, 24, 8; ; CHECK-NEXT: selp.b32 %r13, %r12, %r11, %p4; @@ -1421,16 +1421,16 @@ define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ld.b32 %r2, [%rd2]; ; CHECK-NEXT: bfe.u32 %r3, %r2, 0, 8; ; CHECK-NEXT: bfe.u32 %r4, %r1, 0, 8; -; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3; +; CHECK-NEXT: setp.gt.u32 %p1, %r4, %r3; ; CHECK-NEXT: bfe.u32 %r5, %r2, 8, 8; ; CHECK-NEXT: bfe.u32 %r6, %r1, 8, 8; -; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5; +; CHECK-NEXT: setp.gt.u32 %p2, %r6, %r5; ; CHECK-NEXT: bfe.u32 %r7, %r2, 16, 8; ; CHECK-NEXT: bfe.u32 %r8, %r1, 16, 8; -; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7; +; CHECK-NEXT: setp.gt.u32 %p3, %r8, %r7; ; CHECK-NEXT: bfe.u32 %r9, %r2, 24, 8; ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8; -; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9; +; CHECK-NEXT: setp.gt.u32 %p4, %r10, %r9; ; CHECK-NEXT: selp.b32 %r11, -1, 0, %p4; ; CHECK-NEXT: selp.b32 %r12, -1, 0, %p3; ; CHECK-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U; diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll index 5cfdbb7447ad8..307e2c8550914 100644 --- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll +++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll @@ -62,7 +62,7 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) { ; CHECK-NEXT: ld.param.b64 %rd2, [test_b128_input_from_select_param_0]; ; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2; ; CHECK-NEXT: ld.global.b8 %rs1, [%rd3]; -; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0; +; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0; ; CHECK-NEXT: selp.b64 %rd4, 24, 42, %p1; ; CHECK-NEXT: mov.b64 %rd5, 0; ; CHECK-NEXT: mov.b128 %rq1, {%rd4, %rd5}; diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll index 6dbf44f38aa2f..037d7df1aee59 100644 --- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll +++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll @@ -16,7 +16,7 @@ define void @test_b128_in_loop() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.global.s32 %rd1, [size]; -; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0; +; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0; ; CHECK-NEXT: @%p1 bra $L__BB0_3; ; CHECK-NEXT: // %bb.1: // %BB1 ; CHECK-NEXT: ld.global.v2.b64 {%rd12, %rd13}, [x]; @@ -36,7 +36,7 @@ define void @test_b128_in_loop() { ; CHECK-NEXT: mov.b128 {%rd12, %rd13}, %rq1; ; CHECK-NEXT: st.global.v2.b64 [x], {%rd12, %rd13}; ; CHECK-NEXT: add.s64 %rd14, %rd14, 1; -; CHECK-NEXT: setp.ne.s64 %p2, %rd1, %rd14; +; CHECK-NEXT: setp.ne.b64 %p2, %rd1, %rd14; ; CHECK-NEXT: @%p2 bra $L__BB0_2; ; CHECK-NEXT: $L__BB0_3: // %BB3 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll index 955befc624c71..a6238352179ca 100644 --- a/llvm/test/CodeGen/NVPTX/jump-table.ll +++ b/llvm/test/CodeGen/NVPTX/jump-table.ll @@ -99,7 +99,7 @@ define i32 @test2(i32 %tmp158) { ; CHECK-NEXT: st.param.b32 [func_retval0], 12; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_5: // %entry -; CHECK-NEXT: setp.eq.s32 %p3, %r1, 1024; +; CHECK-NEXT: setp.eq.b32 %p3, %r1, 1024; ; CHECK-NEXT: @%p3 bra $L__BB1_3; ; CHECK-NEXT: bra.uni $L__BB1_6; ; CHECK-NEXT: $L__BB1_3: // %bb338 diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll index d494ee30c2821..b6a00e03a80ab 100644 --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -613,7 +613,7 @@ define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) { ; SM20-NEXT: add.rn.f32 %r9, %r7, %r9; ; SM20-NEXT: add.s64 %rd7, %rd7, 4; ; SM20-NEXT: add.s32 %r8, %r8, -1; -; SM20-NEXT: setp.ne.s32 %p1, %r8, 0; +; SM20-NEXT: setp.ne.b32 %p1, %r8, 0; ; SM20-NEXT: @%p1 bra $L__BB18_1; ; SM20-NEXT: // %bb.2: // %exit ; SM20-NEXT: st.global.b32 [%rd2], %r9; @@ -638,7 +638,7 @@ define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) { ; SM35-NEXT: add.rn.f32 %r9, %r7, %r9; ; SM35-NEXT: add.s64 %rd7, %rd7, 4; ; SM35-NEXT: add.s32 %r8, %r8, -1; -; SM35-NEXT: setp.ne.s32 %p1, %r8, 0; +; SM35-NEXT: setp.ne.b32 %p1, %r8, 0; ; SM35-NEXT: @%p1 bra $L__BB18_1; ; SM35-NEXT: // %bb.2: // %exit ; SM35-NEXT: st.global.b32 [%rd2], %r9; diff --git a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll index 99212fc0dff79..297b2b984cdae 100644 --- a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll +++ b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll @@ -160,7 +160,7 @@ entry: ; PTX-LABEL: .visible .func (.param .b64 func_retval0) memmove_caller( ; PTX: ld.param.b64 %rd[[N:[0-9]+]] -; PTX-DAG: setp.eq.s64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0 +; PTX-DAG: setp.eq.b64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0 ; PTX-DAG: setp.ge.u64 %p[[SRC_GT_THAN_DST:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} ; PTX-NEXT: @%p[[SRC_GT_THAN_DST]] bra $L__BB[[FORWARD_BB:[0-9_]+]] ; -- this is the backwards copying BB diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index dde71b009d564..e9635e9393984 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -621,9 +621,9 @@ define half @minimum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; +; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs1, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; +; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs2, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -657,9 +657,9 @@ define half @minimum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs1, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs2, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -682,9 +682,9 @@ define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; +; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; @@ -727,7 +727,7 @@ define float @minimum_imm1(float %a) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; @@ -768,7 +768,7 @@ define float @minimum_imm2(float %a) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; @@ -810,9 +810,9 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; +; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; @@ -826,7 +826,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1]; -; CHECK-F16-NEXT: min.NaN.ftz.f32 %r3, %r1, %r2; +; CHECK-F16-NEXT: min.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; @@ -837,7 +837,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1]; -; CHECK-SM80-NOF16-NEXT: min.NaN.ftz.f32 %r3, %r1, %r2; +; CHECK-SM80-NOF16-NEXT: min.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float %b) @@ -856,9 +856,9 @@ define double @minimum_double(double %a, double %b) { ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.eq.s64 %p2, %rd1, -9223372036854775808; +; CHECK-NEXT: setp.eq.b64 %p2, %rd1, -9223372036854775808; ; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.eq.s64 %p3, %rd2, -9223372036854775808; +; CHECK-NEXT: setp.eq.b64 %p3, %rd2, -9223372036854775808; ; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; @@ -884,9 +884,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs2, -32768; +; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs2, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs4, -32768; +; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs4, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -897,9 +897,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; -; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs1, -32768; +; CHECK-NOF16-NEXT: setp.eq.b16 %p8, %rs1, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; -; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs3, -32768; +; CHECK-NOF16-NEXT: setp.eq.b16 %p9, %rs3, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; @@ -933,9 +933,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs2, -32768; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs2, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs4, -32768; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs4, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -946,9 +946,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs1, -32768; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p8, %rs1, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs3, -32768; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p9, %rs3, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; @@ -1152,9 +1152,9 @@ define half @maximum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; +; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs1, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; +; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs2, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -1188,9 +1188,9 @@ define half @maximum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs2, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -1291,9 +1291,9 @@ define float @maximum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: max.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; +; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; +; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; @@ -1337,9 +1337,9 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: max.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; +; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; +; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; @@ -1353,7 +1353,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1]; -; CHECK-F16-NEXT: max.NaN.ftz.f32 %r3, %r1, %r2; +; CHECK-F16-NEXT: max.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; @@ -1364,7 +1364,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1]; -; CHECK-SM80-NOF16-NEXT: max.NaN.ftz.f32 %r3, %r1, %r2; +; CHECK-SM80-NOF16-NEXT: max.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float %b) @@ -1383,9 +1383,9 @@ define double @maximum_double(double %a, double %b) { ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.eq.s64 %p2, %rd1, 0; +; CHECK-NEXT: setp.eq.b64 %p2, %rd1, 0; ; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.eq.s64 %p3, %rd2, 0; +; CHECK-NEXT: setp.eq.b64 %p3, %rd2, 0; ; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; @@ -1411,9 +1411,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs2, 0; +; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs2, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs4, 0; +; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs4, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -1424,9 +1424,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; -; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs1, 0; +; CHECK-NOF16-NEXT: setp.eq.b16 %p8, %rs1, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; -; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs3, 0; +; CHECK-NOF16-NEXT: setp.eq.b16 %p9, %rs3, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; @@ -1460,9 +1460,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs2, 0; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs2, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs4, 0; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs4, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -1473,9 +1473,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs1, 0; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p8, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs3, 0; +; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p9, %rs3, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; diff --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll index 0af8190f20d18..f6e6196345fcb 100644 --- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll +++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll @@ -12,8 +12,8 @@ define <2 x i16> @sext_setcc_v2i1_to_v2i16(ptr %p) { ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v2i1_to_v2i16_param_0]; ; CHECK-NEXT: ld.v2.b16 {%rs1, %rs2}, [%rd1]; -; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0; -; CHECK-NEXT: setp.eq.s16 %p2, %rs2, 0; +; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0; +; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0; ; CHECK-NEXT: selp.b16 %rs3, -1, 0, %p2; ; CHECK-NEXT: selp.b16 %rs4, -1, 0, %p1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -39,19 +39,19 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) { ; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8; ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; ; CHECK-NEXT: and.b16 %rs2, %rs1, 255; -; CHECK-NEXT: setp.eq.s16 %p1, %rs2, 0; +; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 0; ; CHECK-NEXT: bfe.u32 %r3, %r1, 8, 8; ; CHECK-NEXT: cvt.u16.u32 %rs3, %r3; ; CHECK-NEXT: and.b16 %rs4, %rs3, 255; -; CHECK-NEXT: setp.eq.s16 %p2, %rs4, 0; +; CHECK-NEXT: setp.eq.b16 %p2, %rs4, 0; ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; ; CHECK-NEXT: cvt.u16.u32 %rs5, %r4; ; CHECK-NEXT: and.b16 %rs6, %rs5, 255; -; CHECK-NEXT: setp.eq.s16 %p3, %rs6, 0; +; CHECK-NEXT: setp.eq.b16 %p3, %rs6, 0; ; CHECK-NEXT: bfe.u32 %r5, %r1, 24, 8; ; CHECK-NEXT: cvt.u16.u32 %rs7, %r5; ; CHECK-NEXT: and.b16 %rs8, %rs7, 255; -; CHECK-NEXT: setp.eq.s16 %p4, %rs8, 0; +; CHECK-NEXT: setp.eq.b16 %p4, %rs8, 0; ; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4; ; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3; ; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U; diff --git a/llvm/test/CodeGen/NVPTX/tid-range.ll b/llvm/test/CodeGen/NVPTX/tid-range.ll index 019814e47c2b1..3ec33eace6441 100644 --- a/llvm/test/CodeGen/NVPTX/tid-range.ll +++ b/llvm/test/CodeGen/NVPTX/tid-range.ll @@ -12,7 +12,7 @@ define i1 @test1() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: mov.u32 %r1, %tid.x; -; CHECK-NEXT: setp.eq.s32 %p1, %r1, 1; +; CHECK-NEXT: setp.eq.b32 %p1, %r1, 1; ; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret;