Skip to content

Commit 2b69de9

Browse files
committed
[CIR] [Lowering] [X86_64] Support VAArg for LongDouble
1 parent 94a5b92 commit 2b69de9

File tree

4 files changed

+154
-8
lines changed

4 files changed

+154
-8
lines changed

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ std::unique_ptr<mlir::cir::LowerModule> getLowerModule(mlir::cir::VAArgOp op) {
4747
mlir::ModuleOp mo = op->getParentOfType<mlir::ModuleOp>();
4848
if (!mo)
4949
return nullptr;
50-
5150
mlir::PatternRewriter rewriter(mo.getContext());
5251
return mlir::cir::createLowerModule(mo, rewriter);
5352
}
@@ -96,7 +95,8 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
9695
// FIXME: return early since X86_64ABIInfo::classify can't handle these types.
9796
// Let's hope LLVM's va_arg instruction can take care of it.
9897
// Remove this when X86_64ABIInfo::classify can take care of every type.
99-
if (!mlir::isa<VoidType, SingleType, DoubleType, BoolType, StructType>(op.getType()))
98+
if (!mlir::isa<VoidType, SingleType, DoubleType, BoolType, StructType,
99+
LongDoubleType>(op.getType()))
100100
return nullptr;
101101

102102
// Assume that va_list type is correct; should be pointer to LLVM type:
@@ -111,7 +111,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
111111
std::unique_ptr<mlir::cir::LowerModule> lowerModule = getLowerModule(op);
112112
if (!lowerModule)
113113
return nullptr;
114-
115114
mlir::Type ty = op.getType();
116115

117116
// FIXME: How should we access the X86AVXABILevel?
@@ -171,7 +170,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
171170
mlir::Block *contBlock = currentBlock->splitBlock(op);
172171
mlir::Block *inRegBlock = builder.createBlock(contBlock);
173172
mlir::Block *inMemBlock = builder.createBlock(contBlock);
174-
175173
builder.setInsertionPointToEnd(currentBlock);
176174
builder.create<BrCondOp>(loc, inRegs, inRegBlock, inMemBlock);
177175

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,21 @@ void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi,
166166
Current = Class::SSE;
167167
return;
168168

169+
} else if (isa<LongDoubleType>(Ty)) {
170+
const llvm::fltSemantics *LDF =
171+
&getContext().getTargetInfo().getLongDoubleFormat();
172+
if (LDF == &llvm::APFloat::IEEEquad()) {
173+
Lo = Class::SSE;
174+
Hi = Class::SSEUp;
175+
} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
176+
Lo = Class::X87;
177+
Hi = Class::X87Up;
178+
} else if (LDF == &llvm::APFloat::IEEEdouble()) {
179+
Current = Class::SSE;
180+
} else {
181+
llvm_unreachable("unexpected long double representation!");
182+
}
183+
return;
169184
} else if (isa<BoolType>(Ty)) {
170185
Current = Class::Integer;
171186
} else if (const auto RT = dyn_cast<StructType>(Ty)) {
@@ -268,6 +283,65 @@ void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi,
268283
cir_cconv_unreachable("NYI");
269284
}
270285

286+
ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty,
287+
unsigned freeIntRegs) const {
288+
// If this is a scalar LLVM value then assume LLVM will pass it in the right
289+
// place naturally.
290+
//
291+
// This assumption is optimistic, as there could be free registers available
292+
// when we need to pass this argument in memory, and LLVM could try to pass
293+
// the argument in the free register. This does not seem to happen currently,
294+
// but this code would be much safer if we could mark the argument with
295+
// 'onstack'. See PR12193.
296+
if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/
297+
/*!Ty->isBitIntType()*/) {
298+
// FIXME: Handling enum type?
299+
300+
return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
301+
: ABIArgInfo::getDirect());
302+
}
303+
304+
if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI()))
305+
return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory);
306+
307+
// Compute the byval alignment. We specify the alignment of the byval in all
308+
// cases so that the mid-level optimizer knows the alignment of the byval.
309+
unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U);
310+
311+
// Attempt to avoid passing indirect results using byval when possible. This
312+
// is important for good codegen.
313+
//
314+
// We do this by coercing the value into a scalar type which the backend can
315+
// handle naturally (i.e., without using byval).
316+
//
317+
// For simplicity, we currently only do this when we have exhausted all of the
318+
// free integer registers. Doing this when there are free integer registers
319+
// would require more care, as we would have to ensure that the coerced value
320+
// did not claim the unused register. That would require either reording the
321+
// arguments to the function (so that any subsequent inreg values came first),
322+
// or only doing this optimization when there were no following arguments that
323+
// might be inreg.
324+
//
325+
// We currently expect it to be rare (particularly in well written code) for
326+
// arguments to be passed on the stack when there are still free integer
327+
// registers available (this would typically imply large structs being passed
328+
// by value), so this seems like a fair tradeoff for now.
329+
//
330+
// We can revisit this if the backend grows support for 'onstack' parameter
331+
// attributes. See PR12193.
332+
if (freeIntRegs == 0) {
333+
uint64_t size = getContext().getTypeSize(ty);
334+
335+
// If this type fits in an eightbyte, coerce it into the matching integral
336+
// type, which will end up on the stack (with alignment 8).
337+
if (align == 8 && size <= 64)
338+
return ABIArgInfo::getDirect(
339+
mlir::cir::IntType::get(LT.getMLIRContext(), size, false));
340+
}
341+
342+
return ABIArgInfo::getIndirect(align);
343+
}
344+
271345
/// Return a type that will be passed by the backend in the low 8 bytes of an
272346
/// XMM register, corresponding to the SSE class.
273347
Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
@@ -278,7 +352,7 @@ Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
278352
(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
279353
Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
280354
if (!T0 || isa<Float64Type>(T0))
281-
return T0; // NOTE(cir): Not sure if this is correct.
355+
return ::mlir::cir::DoubleType::get(LT.getMLIRContext());
282356

283357
Type T1 = {};
284358
unsigned T0Size = TD.getTypeAllocSize(T0);
@@ -296,6 +370,8 @@ Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
296370
return T0;
297371
}
298372

373+
return ::mlir::cir::DoubleType::get(LT.getMLIRContext());
374+
299375
cir_cconv_unreachable("NYI");
300376
}
301377

@@ -538,13 +614,34 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs,
538614
++neededSSE;
539615
break;
540616
}
617+
// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
618+
// on the stack.
619+
case Class::Memory:
620+
621+
// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
622+
// COMPLEX_X87, it is passed in memory.
623+
case Class::X87:
624+
case Class::ComplexX87:
625+
if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect)
626+
++neededInt;
627+
return getIndirectResult(Ty, freeIntRegs);
628+
629+
case Class::SSEUp:
630+
case Class::X87Up:
631+
llvm_unreachable("Invalid classification for lo word.");
632+
541633
default:
542634
cir_cconv_assert_or_abort(
543635
!::cir::MissingFeatures::X86ArgTypeClassification(), "NYI");
544636
}
545637

546638
Type HighPart = {};
547639
switch (Hi) {
640+
case Class::Memory:
641+
case Class::X87:
642+
case Class::ComplexX87:
643+
llvm_unreachable("Invalid classification for hi word.");
644+
548645
case Class::NoClass:
549646
break;
550647

@@ -557,8 +654,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs,
557654
return ABIArgInfo::getDirect(HighPart, 8);
558655
break;
559656

560-
default:
561-
cir_cconv_unreachable("NYI");
657+
// X87Up generally doesn't occur here (long double is passed in
658+
// memory), except in situations involving unions.
659+
case Class::X87Up:
660+
case Class::SSE:
661+
++neededSSE;
662+
HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8);
663+
664+
if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory.
665+
return ABIArgInfo::getDirect(HighPart, 8);
666+
break;
667+
668+
// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
669+
// eightbyte is passed in the upper half of the last used SSE
670+
// register. This only happens when 128-bit vectors are passed.
671+
case Class::SSEUp:
672+
llvm_unreachable("NYI && We need to implement GetByteVectorType");
673+
break;
562674
}
563675

564676
// If a high part was specified, merge it together with the low part. It is

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ class X86_64ABIInfo : public ABIInfo {
6868
Type GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset, Type SourceTy,
6969
unsigned SourceOffset) const;
7070

71+
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
72+
/// such that the argument will be passed in memory.
73+
///
74+
/// \param freeIntRegs - The number of free integer registers remaining
75+
/// available.
76+
::cir::ABIArgInfo getIndirectResult(mlir::Type ty,
77+
unsigned freeIntRegs) const;
78+
7179
/// The 0.98 ABI revision clarified a lot of ambiguities,
7280
/// unfortunately in ways that were not always consistent with
7381
/// certain previous compilers. In particular, platforms which
@@ -94,4 +102,4 @@ class X86_64ABIInfo : public ABIInfo {
94102
};
95103

96104
} // namespace cir
97-
} // namespace mlir
105+
} // namespace mlir

clang/test/CIR/Lowering/var-arg-x86_64.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,31 @@ double f1(int n, ...) {
3838
// CHECK: [[CONT_BB]]:
3939
// CHECK: [[VA_LIST3:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
4040
// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST3]])
41+
42+
long double f2(int n, ...) {
43+
va_list valist;
44+
va_start(valist, n);
45+
long double res = va_arg(valist, long double);
46+
va_end(valist);
47+
return res;
48+
}
49+
50+
// CHECK: define {{.*}}@f2
51+
// CHECK: [[RESULT:%.+]] = alloca x86_fp80
52+
// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
53+
// CHECK: [[RES:%.+]] = alloca x86_fp80
54+
// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
55+
// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
56+
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
57+
// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
58+
// CHECK: [[OVERFLOW_AREA:%.+]] = load {{.*}}, ptr [[OVERFLOW_AREA_P]]
59+
// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr ptr, ptr [[OVERFLOW_AREA]], i64 8
60+
// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
61+
// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[OVERFLOW_AREA]]
62+
// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
63+
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
64+
// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
65+
// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
66+
// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
67+
// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
68+
// CHECK: ret x86_fp80 [[RETURN_VALUE]]

0 commit comments

Comments
 (0)