Skip to content

Commit 142c131

Browse files
ecnelisesmemfrob
authored and
memfrob
committed
[PowerPC] Add intrinsic to read or set FPSCR register
This patch introduces two intrinsics: llvm.ppc.setflm and llvm.ppc.readflm. They read from or write to FPSCR register (floating-point status & control) which contains rounding mode and exception status. To ensure correctness of program, we need to prevent FP operations from being moved across these intrinsics (mffs/mtfsf instruction), so here I set them as scheduling boundaries. We can relax such restriction if FPSCR is modeled well in the future. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D84914
1 parent df6507b commit 142c131

File tree

7 files changed

+149
-31
lines changed

7 files changed

+149
-31
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
4747
// eieio instruction
4848
def int_ppc_eieio : Intrinsic<[],[],[]>;
4949

50+
// Get content from current FPSCR register
51+
def int_ppc_readflm : Intrinsic<[llvm_double_ty], [], [IntrNoMem]>;
52+
// Set FPSCR register, and return previous content
53+
def int_ppc_setflm : Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
54+
5055
// Intrinsics for [double]word extended forms of divide instructions
5156
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
5257
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12135,6 +12135,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1213512135
.addReg(NewFPSCRReg)
1213612136
.addImm(0)
1213712137
.addImm(0);
12138+
} else if (MI.getOpcode() == PPC::SETFLM) {
12139+
DebugLoc Dl = MI.getDebugLoc();
12140+
12141+
// Result of setflm is previous FPSCR content, so we need to save it first.
12142+
Register OldFPSCRReg = MI.getOperand(0).getReg();
12143+
BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12144+
12145+
// Put bits in 32:63 to FPSCR.
12146+
Register NewFPSCRReg = MI.getOperand(1).getReg();
12147+
BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12148+
.addImm(255)
12149+
.addReg(NewFPSCRReg)
12150+
.addImm(0)
12151+
.addImm(0);
1213812152
} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
1213912153
MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
1214012154
return emitProbedAlloca(MI, BB);

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,17 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
16441644
return false;
16451645
}
16461646

1647+
bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1648+
const MachineBasicBlock *MBB,
1649+
const MachineFunction &MF) const {
1650+
// Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
1651+
// across them, since some FP operations may change content of FPSCR.
1652+
// TODO: Model FPSCR in PPC instruction definitions and remove the workaround
1653+
if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
1654+
return true;
1655+
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
1656+
}
1657+
16471658
bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
16481659
ArrayRef<MachineOperand> Pred) const {
16491660
unsigned OpC = MI.getOpcode();

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
463463
// Predication support.
464464
bool isPredicated(const MachineInstr &MI) const override;
465465

466+
bool isSchedulingBoundary(const MachineInstr &MI,
467+
const MachineBasicBlock *MBB,
468+
const MachineFunction &MF) const override;
469+
466470
bool PredicateInstruction(MachineInstr &MI,
467471
ArrayRef<MachineOperand> Pred) const override;
468472

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,9 @@ def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
15141514

15151515
def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
15161516
"#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
1517+
1518+
def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM),
1519+
"#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>;
15171520
}
15181521

15191522
let Defs = [LR] in
@@ -3269,7 +3272,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
32693272
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
32703273
(TCRETURNri CTRRC:$dst, imm:$imm)>;
32713274

3272-
3275+
def : Pat<(int_ppc_readflm), (MFFS)>;
32733276

32743277
// Hi and Lo for Darwin Global Addresses.
32753278
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;

llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -87,23 +87,23 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
8787
; CHECK-NEXT: bl __gcc_qsub
8888
; CHECK-NEXT: mffs 0
8989
; CHECK-NEXT: mtfsb1 31
90-
; CHECK-NEXT: lis 3, .LCPI0_1@ha
9190
; CHECK-NEXT: mtfsb0 30
9291
; CHECK-NEXT: fadd 1, 2, 1
9392
; CHECK-NEXT: mtfsf 1, 0
9493
; CHECK-NEXT: fctiwz 0, 1
95-
; CHECK-NEXT: mffs 1
9694
; CHECK-NEXT: stfd 0, 160(1)
95+
; CHECK-NEXT: mffs 0
9796
; CHECK-NEXT: mtfsb1 31
97+
; CHECK-NEXT: lis 3, .LCPI0_1@ha
9898
; CHECK-NEXT: mtfsb0 30
99-
; CHECK-NEXT: fadd 0, 28, 29
100-
; CHECK-NEXT: mtfsf 1, 1
101-
; CHECK-NEXT: lfs 1, .LCPI0_1@l(3)
102-
; CHECK-NEXT: fctiwz 0, 0
103-
; CHECK-NEXT: stfd 0, 152(1)
99+
; CHECK-NEXT: fadd 1, 28, 29
100+
; CHECK-NEXT: mtfsf 1, 0
101+
; CHECK-NEXT: lfs 0, .LCPI0_1@l(3)
102+
; CHECK-NEXT: fctiwz 1, 1
103+
; CHECK-NEXT: stfd 1, 152(1)
104104
; CHECK-NEXT: fcmpu 0, 28, 27
105105
; CHECK-NEXT: lwz 3, 164(1)
106-
; CHECK-NEXT: fcmpu 1, 29, 1
106+
; CHECK-NEXT: fcmpu 1, 29, 0
107107
; CHECK-NEXT: lwz 4, 156(1)
108108
; CHECK-NEXT: crandc 20, 6, 0
109109
; CHECK-NEXT: cror 20, 5, 20
@@ -209,25 +209,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
209209
; CHECK-NEXT: bl __gcc_qsub
210210
; CHECK-NEXT: mffs 0
211211
; CHECK-NEXT: mtfsb1 31
212-
; CHECK-NEXT: lis 3, .LCPI0_2@ha
213212
; CHECK-NEXT: mtfsb0 30
214213
; CHECK-NEXT: fadd 1, 2, 1
215214
; CHECK-NEXT: mtfsf 1, 0
216215
; CHECK-NEXT: fctiwz 0, 1
217-
; CHECK-NEXT: mffs 1
218216
; CHECK-NEXT: stfd 0, 32(1)
217+
; CHECK-NEXT: mffs 0
219218
; CHECK-NEXT: mtfsb1 31
220-
; CHECK-NEXT: lfs 0, .LCPI0_2@l(3)
221-
; CHECK-NEXT: lis 3, .LCPI0_3@ha
219+
; CHECK-NEXT: lis 3, .LCPI0_2@ha
220+
; CHECK-NEXT: lfs 2, .LCPI0_2@l(3)
222221
; CHECK-NEXT: mtfsb0 30
223-
; CHECK-NEXT: fadd 2, 28, 29
224-
; CHECK-NEXT: mtfsf 1, 1
225-
; CHECK-NEXT: lfs 1, .LCPI0_3@l(3)
226-
; CHECK-NEXT: fctiwz 2, 2
227-
; CHECK-NEXT: stfd 2, 24(1)
228-
; CHECK-NEXT: fcmpu 0, 30, 0
222+
; CHECK-NEXT: lis 3, .LCPI0_3@ha
223+
; CHECK-NEXT: fadd 1, 28, 29
224+
; CHECK-NEXT: mtfsf 1, 0
225+
; CHECK-NEXT: lfs 0, .LCPI0_3@l(3)
226+
; CHECK-NEXT: fctiwz 1, 1
227+
; CHECK-NEXT: stfd 1, 24(1)
228+
; CHECK-NEXT: fcmpu 0, 30, 2
229229
; CHECK-NEXT: lwz 3, 36(1)
230-
; CHECK-NEXT: fcmpu 1, 31, 1
230+
; CHECK-NEXT: fcmpu 1, 31, 0
231231
; CHECK-NEXT: lwz 4, 28(1)
232232
; CHECK-NEXT: crandc 20, 6, 1
233233
; CHECK-NEXT: cror 20, 4, 20
@@ -264,25 +264,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
264264
; CHECK-NEXT: bl __gcc_qsub
265265
; CHECK-NEXT: mffs 0
266266
; CHECK-NEXT: mtfsb1 31
267-
; CHECK-NEXT: lis 3, .LCPI0_0@ha
268267
; CHECK-NEXT: mtfsb0 30
269268
; CHECK-NEXT: fadd 1, 2, 1
270269
; CHECK-NEXT: mtfsf 1, 0
271270
; CHECK-NEXT: fctiwz 0, 1
272-
; CHECK-NEXT: mffs 1
273271
; CHECK-NEXT: stfd 0, 96(1)
272+
; CHECK-NEXT: mffs 0
274273
; CHECK-NEXT: mtfsb1 31
275-
; CHECK-NEXT: lfs 0, .LCPI0_0@l(3)
276-
; CHECK-NEXT: lis 3, .LCPI0_1@ha
274+
; CHECK-NEXT: lis 3, .LCPI0_0@ha
275+
; CHECK-NEXT: lfs 2, .LCPI0_0@l(3)
277276
; CHECK-NEXT: mtfsb0 30
278-
; CHECK-NEXT: fadd 2, 30, 31
279-
; CHECK-NEXT: mtfsf 1, 1
280-
; CHECK-NEXT: lfs 1, .LCPI0_1@l(3)
281-
; CHECK-NEXT: fctiwz 2, 2
282-
; CHECK-NEXT: stfd 2, 88(1)
283-
; CHECK-NEXT: fcmpu 0, 30, 0
277+
; CHECK-NEXT: lis 3, .LCPI0_1@ha
278+
; CHECK-NEXT: fadd 1, 30, 31
279+
; CHECK-NEXT: mtfsf 1, 0
280+
; CHECK-NEXT: lfs 0, .LCPI0_1@l(3)
281+
; CHECK-NEXT: fctiwz 1, 1
282+
; CHECK-NEXT: stfd 1, 88(1)
283+
; CHECK-NEXT: fcmpu 0, 30, 2
284284
; CHECK-NEXT: lwz 3, 100(1)
285-
; CHECK-NEXT: fcmpu 1, 31, 1
285+
; CHECK-NEXT: fcmpu 1, 31, 0
286286
; CHECK-NEXT: lwz 4, 92(1)
287287
; CHECK-NEXT: crandc 20, 6, 0
288288
; CHECK-NEXT: cror 20, 5, 20
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple powerpc64le-unknown-linux | FileCheck %s
3+
; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \
4+
; RUN: 2>&1 | FileCheck %s --check-prefix=LOG
5+
6+
define double @in_nostrict(double %a, double %b, double %c, double %d) {
7+
; CHECK-LABEL: in_nostrict:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: mffs 0
10+
; CHECK-NEXT: xsdivdp 1, 1, 2
11+
; CHECK-NEXT: xsadddp 1, 1, 3
12+
; CHECK-NEXT: xsadddp 0, 1, 0
13+
; CHECK-NEXT: mtfsf 255, 4
14+
; CHECK-NEXT: xsdivdp 1, 3, 4
15+
; CHECK-NEXT: xsadddp 1, 1, 2
16+
; CHECK-NEXT: xsadddp 1, 0, 1
17+
; CHECK-NEXT: blr
18+
;
19+
; LOG: *** MI Scheduling ***
20+
; LOG-NEXT: in_nostrict:%bb.0 entry
21+
; LOG: ExitSU: MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0
22+
; LOG: *** MI Scheduling ***
23+
; LOG-NEXT: in_nostrict:%bb.0 entry
24+
; LOG: ExitSU: %{{[0-9]+}}:f8rc = MFFS implicit $rm
25+
;
26+
; LOG: *** MI Scheduling ***
27+
; LOG-NEXT: in_nostrict:%bb.0 entry
28+
; LOG: ExitSU: MTFSF 255, renamable $f{{[0-9]+}}, 0, 0
29+
entry:
30+
%0 = tail call double @llvm.ppc.readflm()
31+
%1 = fdiv double %a, %b
32+
%2 = fadd double %1, %c
33+
%3 = fadd double %2, %0
34+
call double @llvm.ppc.setflm(double %d)
35+
%5 = fdiv double %c, %d
36+
%6 = fadd double %5, %b
37+
%7 = fadd double %3, %6
38+
ret double %7
39+
}
40+
41+
define double @in_strict(double %a, double %b, double %c, double %d) #0 {
42+
; CHECK-LABEL: in_strict:
43+
; CHECK: # %bb.0: # %entry
44+
; CHECK-NEXT: mffs 0
45+
; CHECK-NEXT: xsdivdp 1, 1, 2
46+
; CHECK-NEXT: xsadddp 1, 1, 3
47+
; CHECK-NEXT: xsadddp 0, 1, 0
48+
; CHECK-NEXT: mtfsf 255, 4
49+
; CHECK-NEXT: xsdivdp 1, 3, 4
50+
; CHECK-NEXT: xsadddp 1, 1, 2
51+
; CHECK-NEXT: xsadddp 1, 0, 1
52+
; CHECK-NEXT: blr
53+
;
54+
; LOG: ***** MI Scheduling *****
55+
; LOG-NEXT: in_strict:%bb.0 entry
56+
; LOG: ExitSU: MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0
57+
; LOG: ***** MI Scheduling *****
58+
; LOG-NEXT: in_strict:%bb.0 entry
59+
; LOG: ExitSU: %{{[0-9]+}}:f8rc = MFFS implicit $rm
60+
;
61+
; LOG: ***** MI Scheduling *****
62+
; LOG-NEXT: in_strict:%bb.0 entry
63+
; LOG: ExitSU: MTFSF 255, renamable $f{{[0-9]+}}, 0, 0
64+
entry:
65+
%0 = tail call double @llvm.ppc.readflm()
66+
%1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
67+
%2 = call double @llvm.experimental.constrained.fadd.f64(double %1, double %c, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
68+
%3 = call double @llvm.experimental.constrained.fadd.f64(double %2, double %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
69+
call double @llvm.ppc.setflm(double %d)
70+
%5 = call double @llvm.experimental.constrained.fdiv.f64(double %c, double %d, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
71+
%6 = call double @llvm.experimental.constrained.fadd.f64(double %5, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
72+
%7 = call double @llvm.experimental.constrained.fadd.f64(double %3, double %6, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
73+
ret double %7
74+
}
75+
76+
declare double @llvm.ppc.readflm()
77+
declare double @llvm.ppc.setflm(double)
78+
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
79+
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
80+
81+
attributes #0 = { strictfp }

0 commit comments

Comments
 (0)