Skip to content

Commit 137f19c

Browse files
bowenxue-inteligcbot
authored andcommitted
WaveShuffleIndex Sinking Optimization
Hoists and combines some identical BinaryOperator instructions that come after WaveShuffleIndex instructions with a constant lane/channel and sink WaveShuffleIndex instrucitons to the point of divergence Use distributive property of insturctions to allow some BinaryOperators to be hoisted above other unhoistable BinaryOperators
1 parent ac93a93 commit 137f19c

File tree

11 files changed

+1044
-0
lines changed

11 files changed

+1044
-0
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ SPDX-License-Identifier: MIT
101101
#include "Compiler/Optimizer/SynchronizationObjectCoalescing.hpp"
102102
#include "Compiler/Optimizer/BarrierControlFlowOptimization.hpp"
103103
#include "Compiler/Optimizer/RuntimeValueVectorExtractPass.h"
104+
#include "Compiler/Optimizer/WaveShuffleIndexSinking.hpp"
104105
#include "Compiler/MetaDataApi/PurgeMetaDataUtils.hpp"
105106
#include "Compiler/HandleLoadStoreInstructions.hpp"
106107
#include "Compiler/CustomSafeOptPass.hpp"
@@ -1424,6 +1425,10 @@ void OptimizeIR(CodeGenContext* const pContext)
14241425
}
14251426

14261427
mpm.add(createIGCInstructionCombiningPass());
1428+
if( IGC_IS_FLAG_ENABLED( EnableWaveShuffleIndexSinking ) )
1429+
{
1430+
mpm.add( createWaveShuffleIndexSinking() );
1431+
}
14271432
mpm.add(new FCmpPaternMatch());
14281433
mpm.add(llvm::createDeadCodeEliminationPass()); // this should be done both before/after constant propagation
14291434

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,3 +273,4 @@ void initializeCollectLoopCountPass(llvm::PassRegistry&);
273273
void initializeRemoveLoopDependencyPass(llvm::PassRegistry&);
274274
void initializeResourceLoopUnrollPass(llvm::PassRegistry&);
275275
void initializeInjectPrintfPass(llvm::PassRegistry&);
276+
void initializeWaveShuffleIndexSinkingPass(llvm::PassRegistry&);

IGC/Compiler/Optimizer/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ set(IGC_BUILD__SRC__Optimizer
3333
"${CMAKE_CURRENT_SOURCE_DIR}/ValueTracker.cpp"
3434
"${CMAKE_CURRENT_SOURCE_DIR}/RuntimeValueVectorExtractPass.cpp"
3535
"${CMAKE_CURRENT_SOURCE_DIR}/BarrierControlFlowOptimization.cpp"
36+
"${CMAKE_CURRENT_SOURCE_DIR}/WaveShuffleIndexSinking.cpp"
3637
)
3738

3839
set(IGC_BUILD__SRC__Compiler_Optimizer
@@ -59,6 +60,7 @@ set(IGC_BUILD__HDR__Optimizer
5960
"${CMAKE_CURRENT_SOURCE_DIR}/ValueTracker.h"
6061
"${CMAKE_CURRENT_SOURCE_DIR}/RuntimeValueVectorExtractPass.h"
6162
"${CMAKE_CURRENT_SOURCE_DIR}/BarrierControlFlowOptimization.hpp"
63+
"${CMAKE_CURRENT_SOURCE_DIR}/WaveShuffleIndexSinking.cpp"
6264
)
6365

6466
set(IGC_BUILD__HDR__Optimizer

IGC/Compiler/Optimizer/WaveShuffleIndexSinking.cpp

Lines changed: 727 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#pragma once
10+
11+
#include "common/LLVMWarningsPush.hpp"
12+
#include <llvm/Pass.h>
13+
#include "common/LLVMWarningsPop.hpp"
14+
15+
namespace IGC
16+
{
17+
llvm::FunctionPass* createWaveShuffleIndexSinking();
18+
} // namespace IGC
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; Verifies that four WaveShuffleIndex instructions with the same source and a constant channel get subsequent instructions checked and hoisted
13+
; Each WaveShuffleIndex instruction is in turn fed into an add, and then a shl
14+
; The second operand of the add is not a constant, so the add is considered an anchor instruction
15+
; The second operand of the shl is a constant, so the shl is considered a hoistable instruction
16+
; Due to distributive properties, the shl is allowed to be hoisted above the add, and afterwards, above all the WaveShuffleIndex instructions
17+
; Since there are 4 WaveShuffleIndex instructions in the ShuffleGroup, we can trade a shl on the source of the WaveShuffleIndex and a shl on the second operand of the add for removing all 4 shl instructions operating on the result of each add
18+
; This changes the number of instructions from 4 * WSI + 4 * add + 4 * shl to shl(for %a) + shl(for %b) + 4 * WS + 4 * add, reducing the total number of instructions by 2, while preserving functionality
19+
; ------------------------------------------------
20+
21+
define void @test_wave_shuffle_index_sinking(i32* %dst0, i32* %dst1, i32* %dst2, i32* %dst3, i32 %a, i32 %b) {
22+
; CHECK: [[HOISTED:%.*]] = shl i32 %a, 2
23+
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 0, i32 0)
24+
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
25+
; CHECK: [[ANCHOR_HOISTED:%.*]] = shl i32 %b, 2
26+
; CHECK-NEXT: [[ANCHOR0:%.*]] = add i32 [[WS0]], [[ANCHOR_HOISTED]]
27+
%add0 = add i32 %ws0, %b
28+
%shl0 = shl i32 %add0, 2
29+
; CHECK: store i32 [[ANCHOR0]], i32* %dst0
30+
store i32 %shl0, i32* %dst0
31+
; CHECK: [[WS1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 1, i32 0)
32+
%ws1 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
33+
; CHECK: [[ANCHOR1:%.*]] = add i32 [[WS1]], [[ANCHOR_HOISTED]]
34+
%add1 = add i32 %ws1, %b
35+
%shl1 = shl i32 %add1, 2
36+
; CHECK: store i32 [[ANCHOR1]], i32* %dst1
37+
store i32 %shl1, i32* %dst1
38+
; CHECK: [[WS2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 2, i32 0)
39+
%ws2 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
40+
; CHECK: [[ANCHOR2:%.*]] = add i32 [[WS2]], [[ANCHOR_HOISTED]]
41+
%add2 = add i32 %ws2, %b
42+
%shl2 = shl i32 %add2, 2
43+
; CHECK: store i32 [[ANCHOR2]], i32* %dst2
44+
store i32 %shl2, i32* %dst2
45+
; CHECK: [[WS3:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 3, i32 0)
46+
%ws3 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 3, i32 0)
47+
; CHECK: [[ANCHOR3:%.*]] = add i32 [[WS3]], [[ANCHOR_HOISTED]]
48+
%add3 = add i32 %ws3, %b
49+
%shl3 = shl i32 %add3, 2
50+
; CHECK: store i32 [[ANCHOR3]], i32* %dst3
51+
store i32 %shl3, i32* %dst3
52+
ret void
53+
}
54+
55+
; Function Attrs: convergent nounwind readnone
56+
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #0
57+
58+
attributes #0 = { convergent nounwind readnone }
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; This test does not primarily demonstrate the benefits/profitability of the optimization
13+
; This test focuses on the auxiliary functionality of splitting and merging various WaveShuffleIndex instructions
14+
; Only the 4 combinations of N/N, N/Y, Y/N, and Y/Y for Split/Merge need to be considered, Sink is for informational purposes only
15+
;
16+
; Test Scenarios
17+
; %ws0: Split: N, Sink: Y, Merge: N
18+
; %ws1: Split: Y, Sink: Y/Y, Merge: N (profitable to sink both paths)
19+
; %ws2: Split: Y, Sink: Y/N, Merge: N (profitable to sink one path)
20+
; %ws3: Split: Y, Sink: N/N, Merge: Y (nothing to group and sink with)
21+
; %ws4: Split: N, Sink: N, Merge: Y
22+
; %ws5: Split: N, Sink: N, Merge: Y
23+
; ------------------------------------------------
24+
25+
define void @test_split_sink_merge(i32 %a, i32 %b, i32 %c, i32 %d) {
26+
; CHECK: [[USE1_WS0_WS1C1_HOISTED:%.*]] = shl i32 %a, 2
27+
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[USE1_WS0_WS1C1_HOISTED]], i32 0, i32 0)
28+
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
29+
%use1_ws0 = shl i32 %ws0, 2
30+
; CHECK: add i32 [[WS0]], %c
31+
%anchor1_ws0 = add i32 %use1_ws0, %c
32+
; CHECK: [[WS1C1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[USE1_WS0_WS1C1_HOISTED]], i32 1, i32 0)
33+
%ws1 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
34+
%use1_ws1 = shl i32 %ws1, 2
35+
; CHECK: add i32 [[WS1C1]], %c
36+
%anchor1_ws1 = add i32 %use1_ws1, %c
37+
; CHECK: [[USE2_WS1C2_WS2C1_HOISTED:%.*]] = shl i32 %a, 3
38+
; CHECK: [[WS1C2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[USE2_WS1C2_WS2C1_HOISTED]], i32 1, i32 0)
39+
%use2_ws1 = shl i32 %ws1, 3
40+
; CHECK: add i32 [[WS1C2]], %d
41+
%anchor2_ws1 = add i32 %use2_ws1, %d
42+
; CHECK: [[WS2C1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[USE2_WS1C2_WS2C1_HOISTED]], i32 2, i32 0)
43+
%ws2 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
44+
%use1_ws2 = shl i32 %ws2, 3
45+
; CHECK: add i32 [[WS2C1]], %d
46+
%anchor1_ws2 = add i32 %use1_ws2, %d
47+
; CHECK: [[WS2C2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
48+
; CHECK: [[USE2_WS2C2_NOT_HOISTED:%.*]] = shl i32 [[WS2C2]], 4
49+
%use2_ws2 = shl i32 %ws2, 4
50+
; CHECK: add i32 [[USE2_WS2C2_NOT_HOISTED]], %d
51+
%anchor2_ws2 = add i32 %use2_ws2, %d
52+
; CHECK: [[WS3:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %b, i32 0, i32 0)
53+
%ws3 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %b, i32 0, i32 0)
54+
; CHECK-NEXT: add i32 [[WS3]], %c
55+
%use1_ws3 = add i32 %ws3, %c
56+
; CHECK-NEXT: add i32 [[WS3]], %d
57+
%use2_ws3 = add i32 %ws3, %d
58+
; CHECK: [[WS4:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %b, i32 1, i32 0)
59+
%ws4 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %b, i32 1, i32 0)
60+
; CHECK-NEXT: {{%.*}} = add i32 [[WS4]], %c
61+
%use1_ws4 = add i32 %ws4, %c
62+
; CHECK-NOT: call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %b, i32 1, i32 0)
63+
%ws5 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %b, i32 1, i32 0)
64+
; CHECK-NEXT: {{%.*}} = add i32 [[WS4]], %d
65+
%use1_ws5 = add i32 %ws5, %d
66+
ret void
67+
}
68+
69+
; Function Attrs: convergent nounwind readnone
70+
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #0
71+
72+
attributes #0 = { convergent nounwind readnone }
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; A new WaveShuffleInstruction may match with more hoistable instructions when compared to some individual WaveShuffleIndex insts in an existing ShuffleGroup
13+
; However, the group's maximal list of hoistable instructions is already established, and can only be reduced as more WaveShuffleIndex insts attempt to join
14+
; The partial group with the additional hoistable instructions will be hoisted in the next iteration
15+
; The pass is configured to run iteratively up to a maximum of the value specified by the WaveShuffleIndexSinkingMaxIterations regkey (default: 3)
16+
; Rerunning ensures that any potential hoistable instructions that were not added to a ShuffleGroup that are still profitable to merge will get merged eventually
17+
; ShuffleGroup consisting of %ws0 and %ws1 have established maximal InstChain, even though %ws1 and %ws2 could have formed a ShuffleGroup with a longer InstChain
18+
; ashr gets hoisted in next iteration when ShuffleGroup containing %ws1 and %ws2 gets constructed since %ws0 has no more suitable instructions in order to join the ShuffleGroup
19+
; Note: Hoisted instructions are to demonstrate functionality, InstCombine would reduce the shl by 2 and ashr by 1 to a single shl by 1
20+
; ------------------------------------------------
21+
22+
define void @test_wave_shuffle_index_sinking(i32* %dst0, i32* %dst1, i32* %dst2, i32* %dst3, i32 %a, i32 %b, i32 %c) {
23+
; CHECK: [[HOISTED_I1:%.*]] = shl i32 %a, 2
24+
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I1]], i32 0, i32 0)
25+
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
26+
; CHECK: [[ANCHOR1_HOISTED_I1:%.*]] = shl i32 %b, 2
27+
; CHECK-NEXT: [[ANCHOR1_WS0:%.*]] = add i32 [[WS0]], [[ANCHOR1_HOISTED_I1]]
28+
%add0 = add i32 %ws0, %b
29+
%shl0 = shl i32 %add0, 2
30+
; CHECK: store i32 [[ANCHOR1_WS0]], i32* %dst0
31+
store i32 %shl0, i32* %dst0
32+
; CHECK: [[HOISTED_I2:%.*]] = ashr i32 [[HOISTED_I1]], 1
33+
; CHECK: [[WS1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I2]], i32 1, i32 0)
34+
%ws1 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
35+
; CHECK: [[ANCHOR1_HOISTED_I2:%.*]] = ashr i32 [[ANCHOR1_HOISTED_I1]], 1
36+
; CHECK: [[ANCHOR1_WS1:%.*]] = add i32 [[WS1]], [[ANCHOR1_HOISTED_I2]]
37+
%add1 = add i32 %ws1, %b
38+
%shl1 = shl i32 %add1, 2
39+
; CHECK: [[ANCHOR2_HOISTED_I2:%.*]] = ashr i32 %c, 1
40+
; CHECK-NEXT: [[ANCHOR2_WS1:%.*]] = mul i32 [[ANCHOR1_WS1]], [[ANCHOR2_HOISTED_I2]]
41+
%mul1 = mul i32 %shl1, %c
42+
%ashr1 = ashr i32 %mul1, 1
43+
; CHECK: store i32 [[ANCHOR2_WS1]], i32* %dst1
44+
store i32 %ashr1, i32* %dst1
45+
; CHECK: [[WS2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I2]], i32 2, i32 0)
46+
%ws2 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
47+
; CHECK: [[ANCHOR1_WS2:%.*]] = add i32 [[WS2]], [[ANCHOR1_HOISTED_I2]]
48+
%add2 = add i32 %ws2, %b
49+
%shl2 = shl i32 %add2, 2
50+
; CHECK: [[ANCHOR2_WS2:%.*]] = mul i32 [[ANCHOR1_WS2]], [[ANCHOR2_HOISTED_I2]]
51+
%mul2 = mul i32 %shl2, %c
52+
%ashr2 = ashr i32 %mul2, 1
53+
; CHECK: store i32 [[ANCHOR2_WS2]], i32* %dst2
54+
store i32 %ashr2, i32* %dst2
55+
; CHECK: [[WS3:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I2]], i32 3, i32 0)
56+
%ws3 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 3, i32 0)
57+
; CHECK: [[ANCHOR1_WS3:%.*]] = add i32 [[WS3]], [[ANCHOR1_HOISTED_I2]]
58+
%add3 = add i32 %ws3, %b
59+
%shl3 = shl i32 %add3, 2
60+
; CHECK: [[ANCHOR2_WS3:%.*]] = mul i32 [[ANCHOR1_WS3]], [[ANCHOR2_HOISTED_I2]]
61+
%mul3 = mul i32 %shl3, %c
62+
%ashr3 = ashr i32 %mul3, 1
63+
; CHECK: store i32 [[ANCHOR2_WS3]], i32* %dst3
64+
store i32 %ashr3, i32* %dst3
65+
ret void
66+
}
67+
68+
; Function Attrs: convergent nounwind readnone
69+
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #0
70+
71+
attributes #0 = { convergent nounwind readnone }
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; An initial ShuffleGroup's InstChain may get reduced to encompass a wider number of WaveShuffleIndex that have fewer similar instructions
13+
; The pass is configured to run iteratively up to a maximum of the value specified by the WaveShuffleIndexSinkingMaxIterations regkey (default: 3)
14+
; Rerunning ensures that any potential hoistable instructions that were kicked out of a ShuffleGroup that are still profitable to merge will get merged eventually
15+
; ShuffleGroup consisting of %ws0, %ws1, and %ws2 gets trimmed in first iteration in order to accommodate %ws3
16+
; ashr gets hoisted in next iteration when ShuffleGroup (%ws0, %ws1, %ws2) gets reconstructed and %ws3 has no more suitable instructions in order to join the ShuffleGroup
17+
; Note: Hoisted instructions are to demonstrate functionality, InstCombine would reduce the shl by 2 and ashr by 1 to a single shl by 1
18+
; ------------------------------------------------
19+
20+
define void @test_wave_shuffle_index_sinking(i32* %dst0, i32* %dst1, i32* %dst2, i32* %dst3, i32 %a, i32 %b, i32 %c) {
21+
; CHECK: [[HOISTED_I1:%.*]] = shl i32 %a, 2
22+
; CHECK: [[HOISTED_I2:%.*]] = ashr i32 [[HOISTED_I1]], 1
23+
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I2]], i32 0, i32 0)
24+
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
25+
; CHECK: [[ANCHOR1_HOISTED_I1:%.*]] = shl i32 %b, 2
26+
; CHECK: [[ANCHOR1_HOISTED_I2:%.*]] = ashr i32 [[ANCHOR1_HOISTED_I1]], 1
27+
; CHECK-NEXT: [[ANCHOR1_WS0:%.*]] = add i32 [[WS0]], [[ANCHOR1_HOISTED_I2]]
28+
%add0 = add i32 %ws0, %b
29+
%shl0 = shl i32 %add0, 2
30+
; CHECK: [[ANCHOR2_HOISTED_I2:%.*]] = ashr i32 %c, 1
31+
; CHECK-NEXT: [[ANCHOR2_WS0:%.*]] = mul i32 [[ANCHOR1_WS0]], [[ANCHOR2_HOISTED_I2]]
32+
%mul0 = mul i32 %shl0, %c
33+
%ashr0 = ashr i32 %mul0, 1
34+
; CHECK: store i32 [[ANCHOR2_WS0]], i32* %dst0
35+
store i32 %ashr0, i32* %dst0
36+
; CHECK: [[WS1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I2]], i32 1, i32 0)
37+
%ws1 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
38+
; CHECK: [[ANCHOR1_WS1:%.*]] = add i32 [[WS1]], [[ANCHOR1_HOISTED_I2]]
39+
%add1 = add i32 %ws1, %b
40+
%shl1 = shl i32 %add1, 2
41+
; CHECK: [[ANCHOR2_WS1:%.*]] = mul i32 [[ANCHOR1_WS1]], [[ANCHOR2_HOISTED_I2]]
42+
%mul1 = mul i32 %shl1, %c
43+
%ashr1 = ashr i32 %mul1, 1
44+
; CHECK: store i32 [[ANCHOR2_WS1]], i32* %dst1
45+
store i32 %ashr1, i32* %dst1
46+
; CHECK: [[WS2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I2]], i32 2, i32 0)
47+
%ws2 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
48+
; CHECK: [[ANCHOR1_WS2:%.*]] = add i32 [[WS2]], [[ANCHOR1_HOISTED_I2]]
49+
%add2 = add i32 %ws2, %b
50+
%shl2 = shl i32 %add2, 2
51+
; CHECK: [[ANCHOR2_WS2:%.*]] = mul i32 [[ANCHOR1_WS2]], [[ANCHOR2_HOISTED_I2]]
52+
%mul2 = mul i32 %shl2, %c
53+
%ashr2 = ashr i32 %mul2, 1
54+
; CHECK: store i32 [[ANCHOR2_WS2]], i32* %dst2
55+
store i32 %ashr2, i32* %dst2
56+
; CHECK: [[WS3:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED_I1]], i32 3, i32 0)
57+
%ws3 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 3, i32 0)
58+
; CHECK: [[ANCHOR1_WS3:%.*]] = add i32 [[WS3]], [[ANCHOR1_HOISTED_I1]]
59+
%add3 = add i32 %ws3, %b
60+
%shl3 = shl i32 %add3, 2
61+
; CHECK: store i32 [[ANCHOR1_WS3]], i32* %dst3
62+
store i32 %shl3, i32* %dst3
63+
ret void
64+
}
65+
66+
; Function Attrs: convergent nounwind readnone
67+
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #0
68+
69+
attributes #0 = { convergent nounwind readnone }

IGC/GenISAIntrinsics/GenIntrinsicInst.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,25 @@ class WavePrefixIntrinsic : public GenIntrinsicInst
12051205
}
12061206
};
12071207

1208+
class WaveShuffleIndexIntrinsic : public GenIntrinsicInst
1209+
{
1210+
public:
1211+
Value* getSrc() const { return getOperand( 0 ); }
1212+
Value* getChannel() const { return getOperand( 1 ); }
1213+
1214+
void setSrc( Value* src ) { setOperand( 0, src ); }
1215+
1216+
// Methods for support type inquiry through isa, cast, and dyn_cast:
1217+
static inline bool classof( const GenIntrinsicInst* I )
1218+
{
1219+
return I->getIntrinsicID() == GenISAIntrinsic::GenISA_WaveShuffleIndex;
1220+
}
1221+
static inline bool classof( const Value* V )
1222+
{
1223+
return isa<GenIntrinsicInst>( V ) && classof( cast<GenIntrinsicInst>( V ) );
1224+
}
1225+
};
1226+
12081227
class QuadPrefixIntrinsic : public GenIntrinsicInst
12091228
{
12101229
public:

0 commit comments

Comments
 (0)