Skip to content

Commit 0f35d3d

Browse files
MiloszSkobejkopszymich
authored andcommitted
WaveShuffleIndexSinking Pass shuffleGroup by basic block
The WaveShuffleIndexSinking Pass doesn't consider program control flow, potentially producing invalid IR if users aren't dominated. It merges instructions globally, risking undefined instructions if blocks are entered out of order, leading to incorrect calculations. Now, shuffleGroups are formed by channel and within the same basic block. (cherry picked from commit 8201e09)
1 parent 89370c3 commit 0f35d3d

File tree

2 files changed

+55
-7
lines changed

2 files changed

+55
-7
lines changed

IGC/Compiler/Optimizer/WaveShuffleIndexSinking.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ namespace IGC
367367
static bool isHoistable( BinaryOperator* inst );
368368
static bool isHoistableOverAnchor( BinaryOperator* instToHoist, BinaryOperator* anchorInst );
369369
Function& F;
370-
DenseMap<Value*, SmallVector<ShuffleGroup, 4>> ShuffleGroupMap;
370+
DenseMap<std::pair<BasicBlock*, Value*>, SmallVector<ShuffleGroup, 4>> ShuffleGroupMap;
371371
DenseSet<WaveShuffleIndexIntrinsic*> Visited;
372372
};
373373

@@ -451,7 +451,7 @@ bool WaveShuffleIndexSinkingImpl::splitWaveShuffleIndexes()
451451
bool WaveShuffleIndexSinkingImpl::mergeWaveShuffleIndexes()
452452
{
453453
// Map from Source to (Map from Lane to list of duplicate instructions)
454-
DenseMap<Value*, DenseMap<ConstantInt*, SmallVector<WaveShuffleIndexIntrinsic*>>> mergeMap;
454+
DenseMap<std::pair<BasicBlock*, Value*>, DenseMap<ConstantInt*, SmallVector<WaveShuffleIndexIntrinsic*>>> mergeMap;
455455
for( auto& BB : F )
456456
{
457457
for( auto& I : BB )
@@ -460,7 +460,7 @@ bool WaveShuffleIndexSinkingImpl::mergeWaveShuffleIndexes()
460460
{
461461
if( auto* constantChannel = dyn_cast<ConstantInt>( waveShuffleInst->getChannel() ) )
462462
{
463-
mergeMap[ waveShuffleInst->getSrc() ][ constantChannel ].push_back( waveShuffleInst );
463+
mergeMap[ {&BB, waveShuffleInst->getSrc()} ][ constantChannel ].push_back( waveShuffleInst );
464464
}
465465
}
466466
}
@@ -505,11 +505,13 @@ void WaveShuffleIndexSinkingImpl::gatherShuffleGroups()
505505
// Save compute and do not re-process/ create a new ShuffleGroup
506506
continue;
507507
}
508-
if( ShuffleGroupMap.count( waveShuffleInst->getSrc() ) )
508+
509+
std::pair<BasicBlock*, Value*> bbShuffleGroup = { &BB, waveShuffleInst->getSrc() };
510+
if ( ShuffleGroupMap.count( bbShuffleGroup ) )
509511
{
510512
// Found existing group(s) with the same source, try to match with one of the groups
511513
bool match = false;
512-
for( auto& shuffleGroup : ShuffleGroupMap[ waveShuffleInst->getSrc() ] )
514+
for (auto& shuffleGroup : ShuffleGroupMap[ bbShuffleGroup ] )
513515
{
514516
if( shuffleGroup.match( waveShuffleInst ) )
515517
{
@@ -521,13 +523,13 @@ void WaveShuffleIndexSinkingImpl::gatherShuffleGroups()
521523
// create new ShuffleGroup since no suitable match was found
522524
if( !match )
523525
{
524-
ShuffleGroupMap[ waveShuffleInst->getSrc() ].emplace_back( waveShuffleInst );
526+
ShuffleGroupMap[ {&BB, waveShuffleInst->getSrc()} ].emplace_back( waveShuffleInst );
525527
}
526528
}
527529
else
528530
{
529531
// create new ShuffleGroup for broadcast operations
530-
ShuffleGroupMap[ waveShuffleInst->getSrc() ].emplace_back( waveShuffleInst );
532+
ShuffleGroupMap[ {&BB, waveShuffleInst->getSrc()} ].emplace_back( waveShuffleInst );
531533
}
532534
}
533535
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt --typed-pointers -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; This test checks if WaveShuffleIndex instructions are not merged when they are identical, but
13+
; in separate basic blocks. This behavior helps to avoid potentially invalid LLVM IR being
14+
; produced by not dominating all users.
15+
; ------------------------------------------------
16+
17+
define void @test_compare_cases(i32* %dst0, i32* %dst1, i1 %condition) {
18+
entry:
19+
br i1 %condition, label %bb0, label %bb1
20+
21+
bb0:
22+
; CHECK: [[SHUFFLE0:%.*]] = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
23+
%simdShuffle0 = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
24+
%and0 = and i8 %simdShuffle0, 1
25+
%cmp0 = icmp eq i8 %and0, 0
26+
%zxt0 = zext i1 %cmp0 to i32
27+
store i32 %zxt0, i32* %dst0
28+
br label %exit
29+
30+
bb1:
31+
; CHECK: [[SHUFFLE1:%.*]] = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
32+
%simdShuffle1 = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
33+
%and1 = and i8 %simdShuffle1, 1
34+
%cmp1 = icmp eq i8 %and1, 0
35+
%zxt1 = zext i1 %cmp1 to i32
36+
store i32 %zxt1, i32* %dst1
37+
br label %exit
38+
39+
exit:
40+
ret void
41+
}
42+
43+
; Function Attrs: convergent nounwind readnone
44+
declare i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8, i32, i32) #0
45+
46+
attributes #0 = { convergent nounwind readnone }

0 commit comments

Comments
 (0)