Skip to content

Commit f1628f3

Browse files
jrtc27memfrob
authored and
memfrob
committed
[SelectionDAG][Mips][PowerPC][RISCV][WebAssembly] Teach computeKnownBits/ComputeNumSignBits about atomics
Unlike normal loads these don't have an extension field, but we know from TargetLowering whether these are sign-extending or zero-extending, and so can optimise away unnecessary extensions. This was noticed on RISC-V, where sign extensions in the calling convention would result in unnecessary explicit extension instructions, but this also fixes some Mips inefficiencies. PowerPC sees churn in the tests as all the zero extensions are only for promoting 32-bit to 64-bit, but these zero extensions are still not optimised away as they should be, likely due to i32 being a legal type. This also simplifies the WebAssembly code somewhat, which currently works around the lack of target-independent combines with some ugly patterns that break once they're optimised away. Re-landed with correct handling in ComputeNumSignBits for Tmp == VTBits, where zero-extending atomics were incorrectly returning 0 rather than the (slightly confusing) required return value of 1. Reviewed By: RKSimon, atanasyan Differential Revision: https://reviews.llvm.org/D101342
1 parent 6ad2a21 commit f1628f3

File tree

8 files changed

+1046
-1090
lines changed

8 files changed

+1046
-1090
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3065,7 +3065,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30653065
break;
30663066
case ISD::SMULO:
30673067
case ISD::UMULO:
3068-
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
30693068
if (Op.getResNo() != 1)
30703069
break;
30713070
// The boolean result conforms to getBooleanContents.
@@ -3520,6 +3519,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
35203519
Known = KnownBits::smin(Known, Known2);
35213520
break;
35223521
}
3522+
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
3523+
if (Op.getResNo() == 1) {
3524+
// The boolean result conforms to getBooleanContents.
3525+
// If we know the result of a setcc has the top bits zero, use this info.
3526+
// We know that we have an integer-based boolean since these operations
3527+
// are only available for integer.
3528+
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
3529+
TargetLowering::ZeroOrOneBooleanContent &&
3530+
BitWidth > 1)
3531+
Known.Zero.setBitsFrom(1);
3532+
break;
3533+
}
3534+
LLVM_FALLTHROUGH;
3535+
case ISD::ATOMIC_CMP_SWAP:
3536+
case ISD::ATOMIC_SWAP:
3537+
case ISD::ATOMIC_LOAD_ADD:
3538+
case ISD::ATOMIC_LOAD_SUB:
3539+
case ISD::ATOMIC_LOAD_AND:
3540+
case ISD::ATOMIC_LOAD_CLR:
3541+
case ISD::ATOMIC_LOAD_OR:
3542+
case ISD::ATOMIC_LOAD_XOR:
3543+
case ISD::ATOMIC_LOAD_NAND:
3544+
case ISD::ATOMIC_LOAD_MIN:
3545+
case ISD::ATOMIC_LOAD_MAX:
3546+
case ISD::ATOMIC_LOAD_UMIN:
3547+
case ISD::ATOMIC_LOAD_UMAX:
3548+
case ISD::ATOMIC_LOAD: {
3549+
unsigned MemBits =
3550+
cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
3551+
// If we are looking at the loaded value.
3552+
if (Op.getResNo() == 0) {
3553+
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
3554+
Known.Zero.setBitsFrom(MemBits);
3555+
}
3556+
break;
3557+
}
35233558
case ISD::FrameIndex:
35243559
case ISD::TargetFrameIndex:
35253560
TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(),
@@ -4100,6 +4135,33 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
41004135
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
41014136
return Tmp;
41024137
}
4138+
case ISD::ATOMIC_CMP_SWAP:
4139+
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
4140+
case ISD::ATOMIC_SWAP:
4141+
case ISD::ATOMIC_LOAD_ADD:
4142+
case ISD::ATOMIC_LOAD_SUB:
4143+
case ISD::ATOMIC_LOAD_AND:
4144+
case ISD::ATOMIC_LOAD_CLR:
4145+
case ISD::ATOMIC_LOAD_OR:
4146+
case ISD::ATOMIC_LOAD_XOR:
4147+
case ISD::ATOMIC_LOAD_NAND:
4148+
case ISD::ATOMIC_LOAD_MIN:
4149+
case ISD::ATOMIC_LOAD_MAX:
4150+
case ISD::ATOMIC_LOAD_UMIN:
4151+
case ISD::ATOMIC_LOAD_UMAX:
4152+
case ISD::ATOMIC_LOAD: {
4153+
Tmp = cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
4154+
// If we are looking at the loaded value.
4155+
if (Op.getResNo() == 0) {
4156+
if (Tmp == VTBits)
4157+
return 1; // early-out
4158+
if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
4159+
return VTBits - Tmp + 1;
4160+
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
4161+
return VTBits - Tmp;
4162+
}
4163+
break;
4164+
}
41034165
}
41044166

41054167
// If we are looking at the loaded value of the SDNode.

llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td

Lines changed: 11 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -259,26 +259,20 @@ defm ATOMIC_LOAD32_U_I64 : AtomicLoad<I64, "i64.atomic.load32_u", 0x16>;
259259
// therefore don't have the extension type field. So instead of matching that,
260260
// we match the patterns that the type legalizer expands them to.
261261

262-
// We directly match zext patterns and select the zext atomic loads.
263-
// i32 (zext (i8 (atomic_load_8))) gets legalized to
264-
// i32 (and (i32 (atomic_load_8)), 255)
265-
// These can be selected to a single zero-extending atomic load instruction.
266-
def zext_aload_8_32 :
267-
PatFrag<(ops node:$addr), (and (i32 (atomic_load_8 node:$addr)), 255)>;
268-
def zext_aload_16_32 :
269-
PatFrag<(ops node:$addr), (and (i32 (atomic_load_16 node:$addr)), 65535)>;
270262
// Unlike regular loads, extension to i64 is handled differently than i32.
271263
// i64 (zext (i8 (atomic_load_8))) gets legalized to
272264
// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255)
265+
// Extension to i32 is elided by SelectionDAG as our atomic loads are
266+
// zero-extending.
273267
def zext_aload_8_64 :
274268
PatFrag<(ops node:$addr),
275-
(and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>;
269+
(i64 (zext (i32 (atomic_load_8 node:$addr))))>;
276270
def zext_aload_16_64 :
277271
PatFrag<(ops node:$addr),
278-
(and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>;
272+
(i64 (zext (i32 (atomic_load_16 node:$addr))))>;
279273
def zext_aload_32_64 :
280274
PatFrag<(ops node:$addr),
281-
(zext (i32 (atomic_load node:$addr)))>;
275+
(i64 (zext (i32 (atomic_load_32 node:$addr))))>;
282276

283277
// We don't have single sext atomic load instructions. So for sext loads, we
284278
// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit
@@ -290,8 +284,6 @@ def sext_aload_16_64 :
290284
PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>;
291285

292286
// Select zero-extending loads with no constant offset.
293-
defm : LoadPatNoOffset<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
294-
defm : LoadPatNoOffset<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
295287
defm : LoadPatNoOffset<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
296288
defm : LoadPatNoOffset<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
297289
defm : LoadPatNoOffset<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
@@ -304,10 +296,6 @@ defm : LoadPatNoOffset<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
304296
// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
305297

306298
// Zero-extending loads with constant offset
307-
defm : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, "ATOMIC_LOAD8_U_I32">;
308-
defm : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, "ATOMIC_LOAD16_U_I32">;
309-
defm : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, "ATOMIC_LOAD8_U_I32">;
310-
defm : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, "ATOMIC_LOAD16_U_I32">;
311299
defm : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">;
312300
defm : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">;
313301
defm : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, "ATOMIC_LOAD32_U_I64">;
@@ -327,8 +315,6 @@ defm : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">;
327315
// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64
328316

329317
// Extending loads with just a constant offset
330-
defm : LoadPatOffsetOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
331-
defm : LoadPatOffsetOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
332318
defm : LoadPatOffsetOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
333319
defm : LoadPatOffsetOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
334320
defm : LoadPatOffsetOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
@@ -337,8 +323,6 @@ defm : LoadPatOffsetOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
337323
defm : LoadPatOffsetOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
338324
defm : LoadPatOffsetOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
339325

340-
defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
341-
defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
342326
defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
343327
defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
344328
defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
@@ -651,22 +635,13 @@ defm : BinRMWPattern<atomic_swap_32, atomic_swap_64,
651635
// These are combined patterns of truncating store patterns and zero-extending
652636
// load patterns above.
653637
class zext_bin_rmw_8_32<PatFrag kind> :
654-
PatFrag<(ops node:$addr, node:$val),
655-
(and (i32 (kind node:$addr, node:$val)), 255)>;
656-
class zext_bin_rmw_16_32<PatFrag kind> :
657-
PatFrag<(ops node:$addr, node:$val),
658-
(and (i32 (kind node:$addr, node:$val)), 65535)>;
638+
PatFrag<(ops node:$addr, node:$val), (i32 (kind node:$addr, node:$val))>;
639+
class zext_bin_rmw_16_32<PatFrag kind> : zext_bin_rmw_8_32<kind>;
659640
class zext_bin_rmw_8_64<PatFrag kind> :
660-
PatFrag<(ops node:$addr, node:$val),
661-
(and (i64 (anyext (i32 (kind node:$addr,
662-
(i32 (trunc (i64 node:$val))))))), 255)>;
663-
class zext_bin_rmw_16_64<PatFrag kind> :
664-
PatFrag<(ops node:$addr, node:$val),
665-
(and (i64 (anyext (i32 (kind node:$addr,
666-
(i32 (trunc (i64 node:$val))))))), 65535)>;
667-
class zext_bin_rmw_32_64<PatFrag kind> :
668641
PatFrag<(ops node:$addr, node:$val),
669642
(zext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
643+
class zext_bin_rmw_16_64<PatFrag kind> : zext_bin_rmw_8_64<kind>;
644+
class zext_bin_rmw_32_64<PatFrag kind> : zext_bin_rmw_8_64<kind>;
670645

671646
// Truncating & sign-extending binary RMW patterns.
672647
// These are combined patterns of truncating store patterns and sign-extending
@@ -887,10 +862,8 @@ defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
887862
// additional nodes such as anyext or assertzext depending on operand types.
888863
class zext_ter_rmw_8_32<PatFrag kind> :
889864
PatFrag<(ops node:$addr, node:$exp, node:$new),
890-
(and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>;
891-
class zext_ter_rmw_16_32<PatFrag kind> :
892-
PatFrag<(ops node:$addr, node:$exp, node:$new),
893-
(and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>;
865+
(i32 (kind node:$addr, node:$exp, node:$new))>;
866+
class zext_ter_rmw_16_32<PatFrag kind> : zext_ter_rmw_8_32<kind>;
894867
class zext_ter_rmw_8_64<PatFrag kind> :
895868
PatFrag<(ops node:$addr, node:$exp, node:$new),
896869
(zext (i32 (assertzext (i32 (kind node:$addr,

0 commit comments

Comments
 (0)