Skip to content

Commit 4b7745a

Browse files
fhahnmemfrob
authored and
memfrob
committed
[LoopInterchange] Move instructions from preheader to outer loop header.
Instructions defined in the original inner loop preheader may depend on values defined in the outer loop header, but the inner loop header will become the entry block in the loop nest. Move the instructions from the preheader to the outer loop header, so we do not break dominance. We also have to check for unsafe instructions in the preheader. If there are no unsafe instructions, all instructions should be movable. Currently we move all instructions except the terminator and rely on LICM to hoist out invariant instructions later. Fixes PR45743
1 parent 1d37cae commit 4b7745a

File tree

3 files changed

+167
-3
lines changed

3 files changed

+167
-3
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
625625
containsUnsafeInstructions(OuterLoopLatch))
626626
return false;
627627

628+
// Also make sure the inner loop preheader does not contain any unsafe
629+
// instructions. Note that all instructions in the preheader will be moved to
630+
// the outer loop header when interchanging.
631+
if (InnerLoopPreHeader != OuterLoopHeader &&
632+
containsUnsafeInstructions(InnerLoopPreHeader))
633+
return false;
634+
628635
LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
629636
// We have a perfect loop nest.
630637
return true;
@@ -1306,6 +1313,21 @@ bool LoopInterchangeTransform::transform() {
13061313
LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
13071314
}
13081315

1316+
// Instructions in the original inner loop preheader may depend on values
1317+
// defined in the outer loop header. Move them there, because the original
1318+
// inner loop preheader will become the entry into the interchanged loop nest.
1319+
// Currently we move all instructions and rely on LICM to move invariant
1320+
// instructions outside the loop nest.
1321+
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1322+
BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1323+
if (InnerLoopPreHeader != OuterLoopHeader) {
1324+
SmallPtrSet<Instruction *, 4> NeedsMoving;
1325+
for (Instruction &I :
1326+
make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1327+
std::prev(InnerLoopPreHeader->end()))))
1328+
I.moveBefore(OuterLoopHeader->getTerminator());
1329+
}
1330+
13091331
Transformed |= adjustLoopLinks();
13101332
if (!Transformed) {
13111333
LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");

llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ define void @lcssa_08(i32 %n, i32 %m) {
2020
; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[N:%.*]], 0
2121
; CHECK-NEXT: br i1 [[CMP24]], label [[INNER_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
2222
; CHECK: outer.preheader:
23-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M:%.*]] to i64
2423
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
2524
; CHECK: outer.header:
2625
; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ]
27-
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M]], 0
26+
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0
27+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
2828
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]]
2929
; CHECK: inner.preheader:
3030
; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64
@@ -41,8 +41,9 @@ define void @lcssa_08(i32 %n, i32 %m) {
4141
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
4242
; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]]
4343
; CHECK: inner.for.body.split:
44+
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ]
4445
; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1
45-
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT]]
46+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]]
4647
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]]
4748
; CHECK: inner.crit_edge:
4849
; CHECK-NEXT: br label [[OUTER_LATCH]]
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -loop-interchange -S %s | FileCheck %s
3+
4+
@global = external local_unnamed_addr global [2 x [10 x i32]], align 16
5+
6+
; We need to move %tmp4 from the inner loop pre header to the outer loop header
7+
; before interchanging.
8+
define void @test1() local_unnamed_addr #0 {
9+
; CHECK-LABEL: @test1(
10+
; CHECK-NEXT: bb:
11+
; CHECK-NEXT: br label [[INNER_PH:%.*]]
12+
; CHECK: outer.header.preheader:
13+
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
14+
; CHECK: outer.header:
15+
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
16+
; CHECK-NEXT: [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED:%.*]], [[OUTER_HEADER_PREHEADER]] ], [ [[RED_NEXT:%.*]], [[OUTER_LATCH]] ]
17+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
18+
; CHECK-NEXT: br label [[INNER_SPLIT1:%.*]]
19+
; CHECK: inner.ph:
20+
; CHECK-NEXT: br label [[INNER:%.*]]
21+
; CHECK: inner:
22+
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[TMP0:%.*]], [[INNER_SPLIT:%.*]] ]
23+
; CHECK-NEXT: [[OUTER_RED]] = phi i32 [ [[RED_NEXT_LCSSA:%.*]], [[INNER_SPLIT]] ], [ 0, [[INNER_PH]] ]
24+
; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]]
25+
; CHECK: inner.split1:
26+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
27+
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
28+
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[INNER_RED]], 20
29+
; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
30+
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
31+
; CHECK-NEXT: br label [[OUTER_LATCH]]
32+
; CHECK: inner.split:
33+
; CHECK-NEXT: [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[OUTER_LATCH]] ]
34+
; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1
35+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 400
36+
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[INNER]]
37+
; CHECK: outer.latch:
38+
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
39+
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
40+
; CHECK-NEXT: br i1 [[EC_2]], label [[INNER_SPLIT]], label [[OUTER_HEADER]]
41+
; CHECK: exit:
42+
; CHECK-NEXT: ret void
43+
;
44+
bb:
45+
br label %outer.header
46+
47+
outer.header: ; preds = %bb11, %bb
48+
%outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
49+
%outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
50+
br label %inner.ph
51+
52+
inner.ph: ; preds = %bb1
53+
%tmp4 = add nsw i64 %outer.iv, 9
54+
br label %inner
55+
56+
inner: ; preds = %bb5, %bb3
57+
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
58+
%inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
59+
%ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
60+
store i32 0, i32* %ptr
61+
%red.next = or i32 %inner.red, 20
62+
%inner.iv.next = add nsw i64 %inner.iv, 1
63+
%ec.1 = icmp eq i64 %inner.iv.next, 400
64+
br i1 %ec.1, label %outer.latch, label %inner
65+
66+
outer.latch: ; preds = %bb5
67+
%red.next.lcssa = phi i32 [ %red.next, %inner ]
68+
%outer.iv.next = add nsw i64 %outer.iv, 1
69+
%ec.2 = icmp eq i64 %outer.iv.next, 400
70+
br i1 %ec.2, label %exit, label %outer.header
71+
72+
exit: ; preds = %bb11
73+
ret void
74+
}
75+
76+
declare void @side_effect()
77+
78+
; Cannot interchange, as the inner loop preheader contains a call to a function
79+
; with side effects.
80+
81+
define void @test2() {
82+
; CHECK-LABEL: @test2(
83+
; CHECK-NEXT: bb:
84+
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
85+
; CHECK: outer.header:
86+
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
87+
; CHECK-NEXT: [[OUTER_RED:%.*]] = phi i32 [ 0, [[BB]] ], [ [[RED_NEXT_LCSSA:%.*]], [[OUTER_LATCH]] ]
88+
; CHECK-NEXT: br label [[INNER_PH:%.*]]
89+
; CHECK: inner.ph:
90+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
91+
; CHECK-NEXT: call void @side_effect()
92+
; CHECK-NEXT: br label [[INNER:%.*]]
93+
; CHECK: inner:
94+
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
95+
; CHECK-NEXT: [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED]], [[INNER_PH]] ], [ [[RED_NEXT:%.*]], [[INNER]] ]
96+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
97+
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
98+
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[INNER_RED]], 20
99+
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
100+
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
101+
; CHECK-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
102+
; CHECK: outer.latch:
103+
; CHECK-NEXT: [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[INNER]] ]
104+
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
105+
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
106+
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
107+
; CHECK: exit:
108+
; CHECK-NEXT: ret void
109+
;
110+
bb:
111+
br label %outer.header
112+
113+
outer.header: ; preds = %bb11, %bb
114+
%outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
115+
%outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
116+
br label %inner.ph
117+
118+
inner.ph: ; preds = %bb1
119+
%tmp4 = add nsw i64 %outer.iv, 9
120+
call void @side_effect()
121+
br label %inner
122+
123+
inner: ; preds = %bb5, %bb3
124+
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
125+
%inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
126+
%ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
127+
store i32 0, i32* %ptr
128+
%red.next = or i32 %inner.red, 20
129+
%inner.iv.next = add nsw i64 %inner.iv, 1
130+
%ec.1 = icmp eq i64 %inner.iv.next, 400
131+
br i1 %ec.1, label %outer.latch, label %inner
132+
133+
outer.latch: ; preds = %bb5
134+
%red.next.lcssa = phi i32 [ %red.next, %inner ]
135+
%outer.iv.next = add nsw i64 %outer.iv, 1
136+
%ec.2 = icmp eq i64 %outer.iv.next, 400
137+
br i1 %ec.2, label %exit, label %outer.header
138+
139+
exit: ; preds = %bb11
140+
ret void
141+
}

0 commit comments

Comments
 (0)