Skip to content

Commit a8f2d5b

Browse files
1997alirezaa00917109
authored andcommitted
[LoopFusion] Detecting loop-carried dependencies using DA info
Loop fusion pass will uses the information provided by DA to detect loop-carried dependencies and fuse the loops if it is legal.
1 parent db18fc4 commit a8f2d5b

File tree

3 files changed

+239
-16
lines changed

3 files changed

+239
-16
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ STATISTIC(OnlySecondCandidateIsGuarded,
100100
"The second candidate is guarded while the first one is not");
101101
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
102102
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
103+
STATISTIC(NumDepSafeFused, "Number of fused loops with dependencies "
104+
"proven safe based on the dependence direction");
103105

104106
enum FusionDependenceAnalysisChoice {
105107
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -1349,6 +1351,33 @@ struct LoopFuser {
13491351
<< "\n");
13501352
}
13511353
#endif
1354+
unsigned Levels = DepResult->getLevels();
1355+
unsigned SeparateLevels = DepResult->getSeparateLevels();
1356+
unsigned CurLoopLevel = FC0.L->getLoopDepth();
1357+
1358+
bool OuterEqDir = true;
1359+
for (unsigned II = 1; II <= std::min(CurLoopLevel - 1, Levels); ++II) {
1360+
unsigned Direction = DepResult->getDirection(II, II > Levels);
1361+
if (!(Direction & Dependence::DVEntry::EQ)) {
1362+
// Different accesses in the outer levels of CurLoopLevel
1363+
OuterEqDir = false;
1364+
break;
1365+
}
1366+
}
1367+
if (!OuterEqDir || CurLoopLevel > Levels + SeparateLevels) {
1368+
LLVM_DEBUG(dbgs() << "Safe to fuse with no dependency\n");
1369+
NumDepSafeFused++;
1370+
return true;
1371+
}
1372+
1373+
assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
1374+
unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);
1375+
if (!(CurDir & Dependence::DVEntry::GT)) {
1376+
LLVM_DEBUG(dbgs() << "Safe to fuse with backward loop-carried "
1377+
"dependency\n");
1378+
NumDepSafeFused++;
1379+
return true;
1380+
}
13521381

13531382
if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
13541383
LLVM_DEBUG(
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
; RUN: opt -S -passes=loop-fusion -da-disable-delinearization-checks < %s | FileCheck %s
2+
3+
; The two inner loops have no dependency and are allowed to be fused as in the
4+
; outer loops, different levels are accessed to.
5+
6+
; C Code
7+
;
8+
;; for (long int i = 0; i < n; i++) {
9+
;; for (long int j = 0; j < n; j++) {
10+
;; for (long int k = 0; k < n; k++) {
11+
;; A[i][j][k] = i;
12+
;; }
13+
;; for (long int k = 0; k < n; k++) {
14+
;; temp = A[i + 3][j + 2][k + 1];
15+
16+
define void @backward_dep0(i64 %n, ptr %A) nounwind uwtable ssp {
17+
entry:
18+
%cmp10 = icmp sgt i64 %n, 0
19+
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
20+
21+
; CHECK-LABEL: backward_dep
22+
; CHECK-COUNT-1: for.body{{[0-9]+}}:
23+
; CHECK-NOT: for.body{{[0-9]+}}:
24+
25+
for.cond1.preheader.preheader: ; preds = %entry
26+
br label %for.cond1.preheader
27+
28+
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
29+
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
30+
%cmp26 = icmp sgt i64 %n, 0
31+
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
32+
33+
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
34+
br label %for.cond4.preheader
35+
36+
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
37+
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
38+
%cmp51 = icmp sgt i64 %n, 0
39+
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
40+
41+
for.body6.preheader: ; preds = %for.cond4.preheader
42+
br label %for.body6
43+
44+
for.body6: ; preds = %for.body6.preheader, %for.body6
45+
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
46+
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
47+
store i64 %i.011, ptr %arrayidx8, align 8
48+
%inc = add nsw i64 %k.02, 1
49+
%exitcond13 = icmp ne i64 %inc, %n
50+
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
51+
52+
for.cond10.loopexit.loopexit: ; preds = %for.body6
53+
br label %for.cond10.loopexit
54+
55+
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
56+
%cmp113 = icmp sgt i64 %n, 0
57+
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
58+
59+
for.body12.preheader: ; preds = %for.cond10.loopexit
60+
br label %for.body12
61+
62+
for.body12: ; preds = %for.body12.preheader, %for.body12
63+
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
64+
%add = add nsw i64 %k9.05, 1
65+
%add13 = add nsw i64 %j.07, 2
66+
%add14 = add nsw i64 %i.011, 3
67+
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
68+
%0 = load i64, ptr %arrayidx17, align 8
69+
%inc19 = add nsw i64 %k9.05, 1
70+
%exitcond = icmp ne i64 %inc19, %n
71+
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
72+
73+
for.inc21.loopexit: ; preds = %for.body12
74+
br label %for.inc21
75+
76+
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
77+
%inc22 = add nsw i64 %j.07, 1
78+
%exitcond14 = icmp ne i64 %inc22, %n
79+
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
80+
81+
for.inc24.loopexit: ; preds = %for.inc21
82+
br label %for.inc24
83+
84+
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
85+
%inc25 = add nsw i64 %i.011, 1
86+
%exitcond15 = icmp ne i64 %inc25, %n
87+
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
88+
89+
for.end26.loopexit: ; preds = %for.inc24
90+
br label %for.end26
91+
92+
for.end26: ; preds = %for.end26.loopexit, %entry
93+
ret void
94+
}
95+
96+
; The two inner loops have a backward loop-carried dependency, allowing them
97+
; to be fused.
98+
99+
; C Code
100+
;
101+
;; for (long int i = 0; i < n; i++) {
102+
;; for (long int j = 0; j < n; j++) {
103+
;; for (long int k = 0; k < n; k++) {
104+
;; A[i][j][k] = i;
105+
;; }
106+
;; for (long int k = 0; k < n; k++) {
107+
;; temp = A[i][j][k - 1];
108+
109+
define void @backward_dep1(i64 %n, ptr %A) nounwind uwtable ssp {
110+
entry:
111+
%cmp10 = icmp sgt i64 %n, 0
112+
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
113+
114+
; CHECK-LABEL: backward_dep
115+
; CHECK-COUNT-1: for.body{{[0-9]+}}:
116+
; CHECK-NOT: for.body{{[0-9]+}}:
117+
118+
for.cond1.preheader.preheader: ; preds = %entry
119+
br label %for.cond1.preheader
120+
121+
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
122+
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
123+
%cmp26 = icmp sgt i64 %n, 0
124+
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
125+
126+
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
127+
br label %for.cond4.preheader
128+
129+
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
130+
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
131+
%cmp51 = icmp sgt i64 %n, 0
132+
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
133+
134+
for.body6.preheader: ; preds = %for.cond4.preheader
135+
br label %for.body6
136+
137+
for.body6: ; preds = %for.body6.preheader, %for.body6
138+
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
139+
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
140+
store i64 %i.011, ptr %arrayidx8, align 8
141+
%inc = add nsw i64 %k.02, 1
142+
%exitcond13 = icmp ne i64 %inc, %n
143+
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
144+
145+
for.cond10.loopexit.loopexit: ; preds = %for.body6
146+
br label %for.cond10.loopexit
147+
148+
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
149+
%cmp113 = icmp sgt i64 %n, 0
150+
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
151+
152+
for.body12.preheader: ; preds = %for.cond10.loopexit
153+
br label %for.body12
154+
155+
for.body12: ; preds = %for.body12.preheader, %for.body12
156+
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
157+
%add = add nsw i64 %k9.05, -1
158+
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
159+
%0 = load i64, ptr %arrayidx17, align 8
160+
%inc19 = add nsw i64 %k9.05, 1
161+
%exitcond = icmp ne i64 %inc19, %n
162+
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
163+
164+
for.inc21.loopexit: ; preds = %for.body12
165+
br label %for.inc21
166+
167+
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
168+
%inc22 = add nsw i64 %j.07, 1
169+
%exitcond14 = icmp ne i64 %inc22, %n
170+
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
171+
172+
for.inc24.loopexit: ; preds = %for.inc21
173+
br label %for.inc24
174+
175+
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
176+
%inc25 = add nsw i64 %i.011, 1
177+
%exitcond15 = icmp ne i64 %inc25, %n
178+
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
179+
180+
for.end26.loopexit: ; preds = %for.inc24
181+
br label %for.end26
182+
183+
for.end26: ; preds = %for.end26.loopexit, %entry
184+
ret void
185+
}

llvm/test/Transforms/LoopFusion/simple.ll

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -298,42 +298,51 @@ bb23: ; preds = %bb17, %bb
298298
ret void
299299
}
300300

301+
; void forward_dep(int *arg) {
302+
; for (int i = 0; i < 100; i++) {
303+
; int tmp = i - 3;
304+
; int val = tmp * (i + 3) % i;
305+
; arg[i] = val;
306+
; }
307+
;
308+
; for (int j = 0; j < 100; j++) {
309+
; int val = arg[j - 3];
310+
; arg[j] = val * 3;
311+
; }
312+
; }
313+
;
301314
define void @forward_dep(ptr noalias %arg) {
302315
; CHECK-LABEL: @forward_dep(
303-
; CHECK-NEXT: bb:
304-
; CHECK-NEXT: br label [[BB7:%.*]]
316+
; CHECK-NEXT: [[BB:.*]]:
317+
; CHECK-NEXT: br label %[[BB7:.*]]
305318
; CHECK: bb7:
306-
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
307-
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
319+
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
320+
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
321+
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
308322
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
309323
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
310324
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
311325
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
312326
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
313327
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
314-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
328+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
315329
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4
316-
; CHECK-NEXT: br label [[BB14]]
330+
; CHECK-NEXT: br label %[[BB14:.*]]
317331
; CHECK: bb14:
318-
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
319-
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
320-
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
321-
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
322-
; CHECK: bb19.preheader:
323-
; CHECK-NEXT: br label [[BB19:%.*]]
324-
; CHECK: bb19:
325-
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
326332
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
327333
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
328334
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
329335
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
330336
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
331337
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4
332-
; CHECK-NEXT: br label [[BB25]]
338+
; CHECK-NEXT: br label %[[BB25]]
333339
; CHECK: bb25:
340+
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
341+
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
342+
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
334343
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
335344
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
336-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
345+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
337346
; CHECK: bb26:
338347
; CHECK-NEXT: ret void
339348
;

0 commit comments

Comments
 (0)