Skip to content

Commit 61571ad

Browse files
author
git apple-llvm automerger
committed
Merge commit 'd998f92a002b' from llvm.org/main into next
2 parents 31d4566 + d998f92 commit 61571ad

File tree

14 files changed

+5068
-4996
lines changed

14 files changed

+5068
-4996
lines changed

llvm/lib/CodeGen/MachineVerifier.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2584,6 +2584,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
25842584
report("Extra explicit operand on non-variadic instruction", MO, MONum);
25852585
}
25862586

2587+
// Verify earlyClobber def operand
2588+
if (MCID.getOperandConstraint(MONum, MCOI::EARLY_CLOBBER) != -1) {
2589+
if (!MO->isReg())
2590+
report("Early clobber must be a register", MI);
2591+
if (!MO->isEarlyClobber())
2592+
report("Missing earlyClobber flag", MI);
2593+
}
2594+
25872595
switch (MO->getType()) {
25882596
case MachineOperand::MO_Register: {
25892597
// Verify debug flag on debug instructions. Check this first because reg0

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,14 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
116116
if (!DstRC || DstRC != SrcRC)
117117
return false;
118118

119-
return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
120-
RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
119+
if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120+
!RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
121+
return false;
122+
const MCInstrDesc &MCID = MI.getDesc();
123+
if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
124+
MI.getOperand(0).setIsEarlyClobber(true);
125+
}
126+
return true;
121127
}
122128

123129
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
@@ -602,6 +608,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
602608
I.setDesc(TII.get(Opc));
603609
I.addOperand(*MF, MachineOperand::CreateImm(0));
604610
I.addImplicitDefUseOperands(*MF);
611+
I.getOperand(0).setIsEarlyClobber(true);
605612
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
606613
}
607614

@@ -3787,6 +3794,10 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
37873794
MI.removeOperand(1); // Intrinsic ID
37883795
MI.addOperand(VDst_In); // Readd VDst_In to the end
37893796
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
3797+
const MCInstrDesc &MCID = MI.getDesc();
3798+
if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
3799+
MI.getOperand(0).setIsEarlyClobber(true);
3800+
}
37903801
return true;
37913802
}
37923803

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) {
2727
; GCN: ; %bb.0:
2828
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
2929
; GCN-NEXT: v_mov_b32_e32 v0, 1
30-
; GCN-NEXT: v_mov_b32_e32 v0, v0
30+
; GCN-NEXT: v_mov_b32_e32 v1, v0
3131
; GCN-NEXT: s_mov_b32 s2, -1
3232
; GCN-NEXT: s_mov_b32 s3, 0xf000
3333
; GCN-NEXT: s_waitcnt lgkmcnt(0)
34-
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
34+
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
3535
; GCN-NEXT: s_endpgm
3636
%tmp.0 = call i32 @llvm.amdgcn.set.inactive.i32(i32 1, i32 poison) #0
3737
%tmp = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp.0)
@@ -68,12 +68,12 @@ define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) {
6868
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
6969
; GCN-NEXT: v_mov_b32_e32 v0, 1
7070
; GCN-NEXT: v_mov_b32_e32 v1, 0
71-
; GCN-NEXT: v_mov_b32_e32 v0, v0
72-
; GCN-NEXT: v_mov_b32_e32 v1, v1
71+
; GCN-NEXT: v_mov_b32_e32 v2, v0
72+
; GCN-NEXT: v_mov_b32_e32 v3, v1
7373
; GCN-NEXT: s_mov_b32 s2, -1
7474
; GCN-NEXT: s_mov_b32 s3, 0xf000
7575
; GCN-NEXT: s_waitcnt lgkmcnt(0)
76-
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
76+
; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0
7777
; GCN-NEXT: s_endpgm
7878
%tmp.0 = call i64 @llvm.amdgcn.set.inactive.i64(i64 1, i64 poison) #0
7979
%tmp = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %tmp.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,10 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
165165
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
166166
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
167167
; GFX10-NEXT: s_clause 0x1
168-
; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
169-
; GFX10-NEXT: global_load_dword v2, v0, s[6:7]
168+
; GFX10-NEXT: global_load_dword v2, v0, s[2:3]
169+
; GFX10-NEXT: global_load_dword v3, v0, s[6:7]
170170
; GFX10-NEXT: s_waitcnt vmcnt(0)
171-
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v1, v2, 0
171+
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v2, v3, 0
172172
; GFX10-NEXT: v_mov_b32_e32 v2, 0
173173
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
174174
; GFX10-NEXT: s_endpgm
@@ -179,15 +179,15 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
179179
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
180180
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
181181
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
182-
; GFX11-NEXT: v_mov_b32_e32 v2, 0
183-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
182+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
184183
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
185184
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
186185
; GFX11-NEXT: s_clause 0x1
187-
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
188-
; GFX11-NEXT: global_load_b32 v0, v0, s[4:5]
186+
; GFX11-NEXT: global_load_b32 v2, v0, s[2:3]
187+
; GFX11-NEXT: global_load_b32 v3, v0, s[4:5]
189188
; GFX11-NEXT: s_waitcnt vmcnt(0)
190-
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, 0
189+
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, 0
190+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
191191
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
192192
; GFX11-NEXT: s_endpgm
193193
%tid = call i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)