Skip to content

Commit e1a3aa0

Browse files
vsemenov368pszymich
authored andcommitted
Fix untyped load 2D intrinsic whole GRF allocation in VC
. (cherry picked from commit 8acdb71)
1 parent 3c12558 commit e1a3aa0

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLoadStoreLegalization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ Value *GenXLoadStoreLegalization::extendUntypedBlockLoad2d(CallInst &CI) const {
488488

489489
if (WidthBytes % ST->getGRFByteSize()) {
490490
SmallVector<Type *, 2> OverloadedTypes;
491-
auto NewNumElements = ST->getGRFByteSize() / ElemSizeBytes;
491+
auto NewNumElements = alignTo(NumElements, ST->getGRFByteSize() / ElemSizeBytes);
492492
auto *VTy = IGCLLVM::FixedVectorType::get(ETy, NewNumElements);
493493
OverloadedTypes.push_back(VTy);
494494
const auto CacheControlIndex =

IGC/VectorCompiler/test/GenXLoadStoreLegalization/LSC/load_2d_ugm.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,22 @@
88

99
; RUN: %opt %use_old_pass_manager% -GenXLoadStoreLegalization -march=genx64 -mcpu=Xe2 -mtriple=spir64-unknown-unknown -S %s | FileCheck %s
1010

11-
declare <6 x i32> @llvm.vc.internal.lsc.load.block.2d.ugm.v6i32.v2i8(i1, i8, <2 x i8>, i8, i16, i16, i64, i32, i32, i32, i32, i32, i32, i32, <6 x i32>)
1211
declare <32 x i16> @llvm.vc.internal.lsc.load.block.2d.ugm.v32i16.v2i8(i1, i8, <2 x i8>, i8, i16, i16, i64, i32, i32, i32, i32, i32, i32, i32, <32 x i16>)
1312
declare <16 x i8> @llvm.vc.internal.lsc.load.2d.ugm.desc.v16i8.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <16 x i8>)
1413
declare <16 x i32> @llvm.vc.internal.lsc.load.2d.ugm.desc.v16i32.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <16 x i32>)
1514
declare <65 x i8> @llvm.vc.internal.lsc.load.2d.ugm.desc.transpose.v65i8.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <65 x i8>)
15+
declare <80 x i16> @llvm.vc.internal.lsc.load.block.2d.ugm.vnni.v80i16.v2i8(i1, i8, <2 x i8>, i8, i16, i16, i64, i32, i32, i32, i32, i32, i32, i32, <80 x i16>)
1616

1717
; CHECK-LABEL: @test_load(
18-
define <6 x i32> @test_load(i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y, <6 x i32> %passthru) {
19-
; CHECK: [[WRREG:%[^ ]+]] = call <16 x i32> @llvm.genx.wrregioni.v16i32.v6i32.i16.i1(<16 x i32> undef, <6 x i32> %passthru, i32 1, i32 1, i32 0, i16 0, i32 undef, i1 true)
20-
; CHECK: [[LOAD:%[^ ]+]] = call <16 x i32> @llvm.vc.internal.lsc.load.block.2d.ugm.v16i32.v2i8(i1 true, i8 3, <2 x i8> <i8 1, i8 2>, i8 1, i16 2, i16 3, i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y, i32 0, i32 0, <16 x i32> [[WRREG]])
21-
; CHECK: [[RDREG:%[^ ]+]] = call <6 x i32> @llvm.genx.rdregioni.v6i32.v16i32.i16(<16 x i32> [[LOAD]], i32 1, i32 1, i32 0, i16 0, i32 undef)
22-
; CHECK: ret <6 x i32> [[RDREG]]
18+
define <80 x i16> @test_load(i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y, <80 x i16> %passthru) {
2319

24-
%load = call <6 x i32> @llvm.vc.internal.lsc.load.block.2d.ugm.v6i32.v2i8(i1 true, i8 3, <2 x i8> <i8 1, i8 2>, i8 1, i16 2, i16 3, i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y, i32 0, i32 0, <6 x i32> %passthru)
25-
ret <6 x i32> %load
20+
; CHECK: [[WRREG:%[^ ]+]] = call <96 x i16> @llvm.genx.wrregioni.v96i16.v80i16.i16.i1(<96 x i16> undef, <80 x i16> %passthru, i32 1, i32 1, i32 0, i16 0, i32 undef, i1 true)
21+
; CHECK: [[LOAD:%[^ ]+]] = call <96 x i16> @llvm.vc.internal.lsc.load.block.2d.ugm.vnni.v96i16.v2i8(i1 true, i8 2, <2 x i8> zeroinitializer, i8 1, i16 6, i16 10, i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y, i32 0, i32 0, <96 x i16> [[WRREG]])
22+
; CHECK: [[RDREG:%[^ ]+]] = call <80 x i16> @llvm.genx.rdregioni.v80i16.v96i16.i16(<96 x i16> [[LOAD]], i32 1, i32 1, i32 0, i16 0, i32 undef)
23+
; CHECK: ret <80 x i16> [[RDREG]]
24+
25+
%load = call <80 x i16> @llvm.vc.internal.lsc.load.block.2d.ugm.vnni.v80i16.v2i8(i1 true, i8 2, <2 x i8> zeroinitializer, i8 1, i16 6, i16 10, i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y, i32 0, i32 0, <80 x i16> %passthru)
26+
ret <80 x i16> %load
2627
}
2728

2829
; CHECK-LABEL: @test_load_whole_grf(
@@ -56,8 +57,8 @@ define <16 x i32> @test_load_desc_whole_grf(<16 x i32> %addr) {
5657

5758
; CHECK-LABEL: test_load_desc_transpose_undef(
5859
define <65 x i8> @test_load_desc_transpose_undef(<16 x i32> %addr) {
59-
; CHECK: [[LOAD:%[^ ]+]] = call <64 x i8> @llvm.vc.internal.lsc.load.2d.ugm.desc.transpose.v64i8.v2i8(i1 true, <2 x i8> <i8 2, i8 2>, i8 1, i16 13, i16 5, <16 x i32> %addr, i32 0, i32 0, <64 x i8> undef)
60-
; CHECK: [[RDREG:%[^ ]+]] = call <65 x i8> @llvm.genx.rdregioni.v65i8.v64i8.i16(<64 x i8> [[LOAD]], i32 1, i32 1, i32 0, i16 0, i32 undef)
60+
; CHECK: [[LOAD:%[^ ]+]] = call <128 x i8> @llvm.vc.internal.lsc.load.2d.ugm.desc.transpose.v128i8.v2i8(i1 true, <2 x i8> <i8 2, i8 2>, i8 1, i16 13, i16 5, <16 x i32> %addr, i32 0, i32 0, <128 x i8> undef)
61+
; CHECK: [[RDREG:%[^ ]+]] = call <65 x i8> @llvm.genx.rdregioni.v65i8.v128i8.i16(<128 x i8> %1, i32 1, i32 1, i32 0, i16 0, i32 undef)
6162
; CHECK: ret <65 x i8> [[RDREG]]
6263

6364
%load = call <65 x i8> @llvm.vc.internal.lsc.load.2d.ugm.desc.transpose.v65i8.v2i8(i1 true, <2 x i8> <i8 2, i8 2>, i8 1, i16 13, i16 5, <16 x i32> %addr, i32 0, i32 0, <65 x i8> undef)

0 commit comments

Comments
 (0)