Skip to content

Commit 60ae9c9

Browse files
[SLP]Do not consider non-profitable loads slices
If all slices are small and end up with strided or even vectorization states, better to not consider these candidates for the vectorization and try to vectorize the whole bunch as gathered loads. Reviewers: hiraditya, RKSimon, HanKuanChen Reviewed By: RKSimon, HanKuanChen Pull Request: #149209
1 parent fcabb53 commit 60ae9c9

File tree

2 files changed

+28
-25
lines changed

2 files changed

+28
-25
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11693,6 +11693,7 @@ void BoUpSLP::transformNodes() {
1169311693
if (StartIdx + VF > End)
1169411694
continue;
1169511695
SmallVector<std::pair<unsigned, unsigned>> Slices;
11696+
bool AllStrided = true;
1169611697
for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) {
1169711698
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
1169811699
// If any instruction is vectorized already - do not try again.
@@ -11743,6 +11744,9 @@ void BoUpSLP::transformNodes() {
1174311744
SmallVector<Value *> PointerOps;
1174411745
LoadsState Res =
1174511746
canVectorizeLoads(Slice, Slice.front(), Order, PointerOps);
11747+
AllStrided &= Res == LoadsState::StridedVectorize ||
11748+
Res == LoadsState::ScatterVectorize ||
11749+
Res == LoadsState::Gather;
1174611750
// Do not vectorize gathers.
1174711751
if (Res == LoadsState::ScatterVectorize ||
1174811752
Res == LoadsState::Gather) {
@@ -11772,6 +11776,11 @@ void BoUpSLP::transformNodes() {
1177211776
}
1177311777
Slices.emplace_back(Cnt, Slice.size());
1177411778
}
11779+
// Do not try to vectorize if all slides are strided or gathered with
11780+
// vector factor 2 and there are more than 2 slices. Better to handle
11781+
// them in gathered loads analysis, may result in better vectorization.
11782+
if (VF == 2 && AllStrided && Slices.size() > 2)
11783+
continue;
1177511784
auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt, unsigned Sz) {
1177611785
E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt);
1177711786
if (StartIdx == Cnt)

llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,50 +14,44 @@ define i32 @test(i32 %s.0) {
1414
; CHECK: [[IF_END3:.*]]:
1515
; CHECK-NEXT: br label %[[IF_END6:.*]]
1616
; CHECK: [[IF_END6]]:
17-
; CHECK-NEXT: [[J_4:%.*]] = phi i32 [ 0, %[[IF_END3]] ], [ [[TMP28:%.*]], %[[O]] ]
18-
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
19-
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
20-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP22:%.*]], %[[O]] ]
17+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP24:%.*]], %[[O]] ]
18+
; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
19+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP29:%.*]], %[[O]] ]
2120
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], zeroinitializer
22-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
24-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
25-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
26-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
27-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
28-
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
29-
; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
21+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[TMP22:%.*]], i32 1
22+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
23+
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
24+
; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 1, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
3025
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP30]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
3126
; CHECK-NEXT: br i1 false, label %[[IF_END24:.*]], label %[[IF_THEN11:.*]]
3227
; CHECK: [[IF_THEN11]]:
33-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
34-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
35-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison>
36-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> poison, i32 [[J_4]], i32 0
37-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP15]], <8 x i32> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>
38-
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
28+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison>
29+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 4, i32 5, i32 6, i32 14>
3930
; CHECK-NEXT: br label %[[IF_END24]]
4031
; CHECK: [[IF_THEN18:.*]]:
4132
; CHECK-NEXT: br label %[[T]]
4233
; CHECK: [[T]]:
43-
; CHECK-NEXT: [[TMP34:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
34+
; CHECK-NEXT: [[TMP13:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
4435
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
4536
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
4637
; CHECK: [[IF_END24]]:
47-
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP34]], %[[T]] ]
48-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
38+
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP12]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP13]], %[[T]] ]
4939
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
50-
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
40+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 6, i32 1>
41+
; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 7>
5142
; CHECK-NEXT: br label %[[O]]
5243
; CHECK: [[O]]:
53-
; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
5444
; CHECK-NEXT: [[TMP23]] = phi <4 x i32> [ [[TMP1]], %[[K]] ], [ [[TMP20]], %[[IF_END24]] ]
55-
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ]
45+
; CHECK-NEXT: [[TMP24]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
46+
; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP34]], %[[IF_END24]] ]
47+
; CHECK-NEXT: [[TMP22]] = extractelement <2 x i32> [[TMP24]], i32 1
5648
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
5749
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> <i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
5850
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
5951
; CHECK-NEXT: [[TMP33]] = shufflevector <8 x i32> [[TMP26]], <8 x i32> [[TMP32]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
60-
; CHECK-NEXT: [[TMP28]] = extractelement <4 x i32> [[TMP24]], i32 3
52+
; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <2 x i32> [[TMP24]], <2 x i32> poison, <2 x i32> <i32 1, i32 poison>
53+
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> poison, <2 x i32> <i32 poison, i32 3>
54+
; CHECK-NEXT: [[TMP29]] = shufflevector <2 x i32> [[TMP35]], <2 x i32> [[TMP28]], <2 x i32> <i32 0, i32 3>
6155
; CHECK-NEXT: br i1 false, label %[[T]], label %[[IF_END6]]
6256
;
6357
entry:

0 commit comments

Comments
 (0)