diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 540756653dd22..664a511efd973 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -841,6 +841,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case Sgpr128: case Vgpr128: return LLT::scalar(128); + case SgprP0: case VgprP0: return LLT::pointer(0, 64); case SgprP1: @@ -855,6 +856,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case SgprP5: case VgprP5: return LLT::pointer(5, 32); + case SgprP8: + return LLT::pointer(8, 128); case SgprV2S16: case VgprV2S16: case UniInVgprV2S16: @@ -940,10 +943,12 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case Sgpr32_WF: case Sgpr64: case Sgpr128: + case SgprP0: case SgprP1: case SgprP3: case SgprP4: case SgprP5: + case SgprP8: case SgprPtr32: case SgprPtr64: case SgprPtr128: @@ -1022,10 +1027,12 @@ void RegBankLegalizeHelper::applyMappingDst( case Sgpr32: case Sgpr64: case Sgpr128: + case SgprP0: case SgprP1: case SgprP3: case SgprP4: case SgprP5: + case SgprP8: case SgprV2S16: case SgprV2S32: case SgprV4S32: @@ -1163,10 +1170,12 @@ void RegBankLegalizeHelper::applyMappingSrc( case Sgpr32: case Sgpr64: case Sgpr128: + case SgprP0: case SgprP1: case SgprP3: case SgprP4: case SgprP5: + case SgprP8: case SgprV2S16: case SgprV2S32: case SgprV4S32: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index a67b12a22589c..6c04fabdbbde7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -66,6 +66,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::pointer(4, 64); case P5: return MRI.getType(Reg) == LLT::pointer(5, 32); + case P8: + return MRI.getType(Reg) == LLT::pointer(8, 128); case Ptr32: return isAnyPtr(MRI.getType(Reg), 32); case Ptr64: @@ -108,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg); case UniP5: return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg); + case UniP8: + return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg); case UniPtr32: return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg); case UniPtr64: @@ -903,6 +907,15 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}}); + addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}}); + + addRulesForGOpcs({G_GLOBAL_VALUE}) + .Any({{UniP0}, {{SgprP0}, {}}}) + .Any({{UniP1}, {{SgprP1}, {}}}) + .Any({{UniP3}, {{SgprP3}, {}}}) + .Any({{UniP4}, {{SgprP4}, {}}}) + .Any({{UniP8}, {{SgprP8}, {}}}); + bool hasSALUFloat = ST->hasSALUFloatInsts(); addRulesForGOpcs({G_FADD}, Standard) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 93e0efda77fdd..fb392d7ae332b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -63,6 +63,7 @@ enum UniformityLLTOpPredicateID { P3, P4, P5, + P8, Ptr32, Ptr64, Ptr128, @@ -72,6 +73,7 @@ enum UniformityLLTOpPredicateID { UniP3, UniP4, UniP5, + UniP8, UniPtr32, UniPtr64, UniPtr128, @@ -134,10 +136,12 @@ enum RegBankLLTMappingApplyID { Sgpr32, Sgpr64, Sgpr128, + SgprP0, SgprP1, SgprP3, SgprP4, SgprP5, + SgprP8, SgprPtr32, SgprPtr64, SgprPtr128, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll new file mode 100644 index 0000000000000..cf9524b860fd2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s + +@flat = external global i32, align 4 +@global = external addrspace(1) global i32, align 4 +@lds = addrspace(3) global i32 poison, align 4 +@constant = external addrspace(4) constant i32, align 4 +@buf = external addrspace(8) global i8 + +define ptr @global_value_as0_external() { +; GCN-LABEL: global_value_as0_external: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, flat@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, flat@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_setpc_b64 s[30:31] + ret ptr @flat +} + +define ptr addrspace(1) @global_value_as1_external() { +; GCN-LABEL: global_value_as1_external: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, global@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, global@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_setpc_b64 s[30:31] + ret ptr addrspace(1) @global +} + +define ptr addrspace(4) @global_value_as4_external() { +; GCN-LABEL: global_value_as4_external: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, constant@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, constant@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_setpc_b64 s[30:31] + ret ptr addrspace(4) @constant +} + +define amdgpu_kernel void @global_value_as3_lds_kernel(ptr addrspace(1) %out) { +; GCN-LABEL: global_value_as3_lds_kernel: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_store_dword v0, v0, s[0:1] +; GCN-NEXT: s_endpgm + %addr = ptrtoint ptr addrspace(3) @lds to i32 + store i32 %addr, ptr addrspace(1) %out + ret void +} + +define void @global_value_as8_buffer_store(i32 %val) { +; GCN-LABEL: global_value_as8_buffer_store: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_getpc_b64 s[8:9] +; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %val, ptr addrspace(8) @buf, i32 0, i32 0, i32 0) + ret void +} + +define i32 @global_value_as8_buffer_load(i32 %offset) { +; GCN-LABEL: global_value_as8_buffer_load: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_getpc_b64 s[8:9] +; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) @buf, i32 %offset, i32 0, i32 0) + ret i32 %val +} + +declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) #0 +declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) #1 + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll index 82886ab9e7d55..e1ac8ba5e6db4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s ; FIXME: Merge with DAG test @lds.external = external unnamed_addr addrspace(3) global [0 x i32] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll index cabb37c330b4a..3396eaedf359e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -1,8 +1,8 @@ -; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s -; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s ; CHECK: error: lds: unsupported initializer for address space diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir index a50c7fe0748b8..fc86dd884fac0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s +# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- |