[CIR] [Lowering] [X86_64] Support VAArg for LongDouble

ChuanqiXu9 · ChuanqiXu9 · commit 2b69de966e9f · 2024-11-08T10:17:35.000+08:00
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp
@@ -47,7 +47,6 @@ std::unique_ptr<mlir::cir::LowerModule> getLowerModule(mlir::cir::VAArgOp op) {
   mlir::ModuleOp mo = op->getParentOfType<mlir::ModuleOp>();
   if (!mo)
     return nullptr;
-
   mlir::PatternRewriter rewriter(mo.getContext());
   return mlir::cir::createLowerModule(mo, rewriter);
 }
@@ -96,7 +95,8 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
   // FIXME: return early since X86_64ABIInfo::classify can't handle these types.
   // Let's hope LLVM's va_arg instruction can take care of it.
   // Remove this when X86_64ABIInfo::classify can take care of every type.
-  if (!mlir::isa<VoidType, SingleType, DoubleType, BoolType, StructType>(op.getType()))
+  if (!mlir::isa<VoidType, SingleType, DoubleType, BoolType, StructType,
+                 LongDoubleType>(op.getType()))
     return nullptr;
 
   // Assume that va_list type is correct; should be pointer to LLVM type:
@@ -111,7 +111,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
   std::unique_ptr<mlir::cir::LowerModule> lowerModule = getLowerModule(op);
   if (!lowerModule)
     return nullptr;
-
   mlir::Type ty = op.getType();
 
   // FIXME: How should we access the X86AVXABILevel?
@@ -171,7 +170,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
   mlir::Block *contBlock = currentBlock->splitBlock(op);
   mlir::Block *inRegBlock = builder.createBlock(contBlock);
   mlir::Block *inMemBlock = builder.createBlock(contBlock);
-
   builder.setInsertionPointToEnd(currentBlock);
   builder.create<BrCondOp>(loc, inRegs, inRegBlock, inMemBlock);
 
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp
@@ -166,6 +166,21 @@ void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi,
       Current = Class::SSE;
       return;
 
+    } else if (isa<LongDoubleType>(Ty)) {
+      const llvm::fltSemantics *LDF =
+          &getContext().getTargetInfo().getLongDoubleFormat();
+      if (LDF == &llvm::APFloat::IEEEquad()) {
+        Lo = Class::SSE;
+        Hi = Class::SSEUp;
+      } else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
+        Lo = Class::X87;
+        Hi = Class::X87Up;
+      } else if (LDF == &llvm::APFloat::IEEEdouble()) {
+        Current = Class::SSE;
+      } else {
+        llvm_unreachable("unexpected long double representation!");
+      }
+      return;
     } else if (isa<BoolType>(Ty)) {
       Current = Class::Integer;
     } else if (const auto RT = dyn_cast<StructType>(Ty)) {
@@ -268,6 +283,65 @@ void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi,
   cir_cconv_unreachable("NYI");
 }
 
+ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty,
+                                            unsigned freeIntRegs) const {
+  // If this is a scalar LLVM value then assume LLVM will pass it in the right
+  // place naturally.
+  //
+  // This assumption is optimistic, as there could be free registers available
+  // when we need to pass this argument in memory, and LLVM could try to pass
+  // the argument in the free register. This does not seem to happen currently,
+  // but this code would be much safer if we could mark the argument with
+  // 'onstack'. See PR12193.
+  if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/
+      /*!Ty->isBitIntType()*/) {
+    // FIXME: Handling enum type?
+
+    return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
+                                              : ABIArgInfo::getDirect());
+  }
+
+  if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI()))
+    return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory);
+
+  // Compute the byval alignment. We specify the alignment of the byval in all
+  // cases so that the mid-level optimizer knows the alignment of the byval.
+  unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U);
+
+  // Attempt to avoid passing indirect results using byval when possible. This
+  // is important for good codegen.
+  //
+  // We do this by coercing the value into a scalar type which the backend can
+  // handle naturally (i.e., without using byval).
+  //
+  // For simplicity, we currently only do this when we have exhausted all of the
+  // free integer registers. Doing this when there are free integer registers
+  // would require more care, as we would have to ensure that the coerced value
+  // did not claim the unused register. That would require either reording the
+  // arguments to the function (so that any subsequent inreg values came first),
+  // or only doing this optimization when there were no following arguments that
+  // might be inreg.
+  //
+  // We currently expect it to be rare (particularly in well written code) for
+  // arguments to be passed on the stack when there are still free integer
+  // registers available (this would typically imply large structs being passed
+  // by value), so this seems like a fair tradeoff for now.
+  //
+  // We can revisit this if the backend grows support for 'onstack' parameter
+  // attributes. See PR12193.
+  if (freeIntRegs == 0) {
+    uint64_t size = getContext().getTypeSize(ty);
+
+    // If this type fits in an eightbyte, coerce it into the matching integral
+    // type, which will end up on the stack (with alignment 8).
+    if (align == 8 && size <= 64)
+      return ABIArgInfo::getDirect(
+          mlir::cir::IntType::get(LT.getMLIRContext(), size, false));
+  }
+
+  return ABIArgInfo::getIndirect(align);
+}
+
 /// Return a type that will be passed by the backend in the low 8 bytes of an
 /// XMM register, corresponding to the SSE class.
 Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
@@ -278,7 +352,7 @@ Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
       (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
   Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
   if (!T0 || isa<Float64Type>(T0))
-    return T0; // NOTE(cir): Not sure if this is correct.
+    return ::mlir::cir::DoubleType::get(LT.getMLIRContext());
 
   Type T1 = {};
   unsigned T0Size = TD.getTypeAllocSize(T0);
@@ -296,6 +370,8 @@ Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
       return T0;
   }
 
+  return ::mlir::cir::DoubleType::get(LT.getMLIRContext());
+
   cir_cconv_unreachable("NYI");
 }
 
@@ -538,13 +614,34 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs,
     ++neededSSE;
     break;
   }
+  // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
+  // on the stack.
+  case Class::Memory:
+
+  // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
+  // COMPLEX_X87, it is passed in memory.
+  case Class::X87:
+  case Class::ComplexX87:
+    if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect)
+      ++neededInt;
+    return getIndirectResult(Ty, freeIntRegs);
+
+  case Class::SSEUp:
+  case Class::X87Up:
+    llvm_unreachable("Invalid classification for lo word.");
+
   default:
     cir_cconv_assert_or_abort(
         !::cir::MissingFeatures::X86ArgTypeClassification(), "NYI");
   }
 
   Type HighPart = {};
   switch (Hi) {
+  case Class::Memory:
+  case Class::X87:
+  case Class::ComplexX87:
+    llvm_unreachable("Invalid classification for hi word.");
+
   case Class::NoClass:
     break;
 
@@ -557,8 +654,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs,
       return ABIArgInfo::getDirect(HighPart, 8);
     break;
 
-  default:
-    cir_cconv_unreachable("NYI");
+  // X87Up generally doesn't occur here (long double is passed in
+  // memory), except in situations involving unions.
+  case Class::X87Up:
+  case Class::SSE:
+    ++neededSSE;
+    HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8);
+
+    if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory.
+      return ABIArgInfo::getDirect(HighPart, 8);
+    break;
+
+  // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
+  // eightbyte is passed in the upper half of the last used SSE
+  // register.  This only happens when 128-bit vectors are passed.
+  case Class::SSEUp:
+    llvm_unreachable("NYI && We need to implement GetByteVectorType");
+    break;
   }
 
   // If a high part was specified, merge it together with the low part.  It is
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h
@@ -68,6 +68,14 @@ class X86_64ABIInfo : public ABIInfo {
   Type GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset, Type SourceTy,
                               unsigned SourceOffset) const;
 
+  /// getIndirectResult - Give a source type \arg Ty, return a suitable result
+  /// such that the argument will be passed in memory.
+  ///
+  /// \param freeIntRegs - The number of free integer registers remaining
+  /// available.
+  ::cir::ABIArgInfo getIndirectResult(mlir::Type ty,
+                                      unsigned freeIntRegs) const;
+
   /// The 0.98 ABI revision clarified a lot of ambiguities,
   /// unfortunately in ways that were not always consistent with
   /// certain previous compilers.  In particular, platforms which
@@ -94,4 +102,4 @@ class X86_64ABIInfo : public ABIInfo {
 };
 
 } // namespace cir
-} // namespace mlir
+} // namespace mlir
diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c
@@ -38,3 +38,31 @@ double f1(int n, ...) {
 // CHECK: [[CONT_BB]]:
 // CHECK: [[VA_LIST3:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
 // CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST3]])
+
+long double f2(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  long double res = va_arg(valist, long double);
+  va_end(valist);
+  return res;
+}
+
+// CHECK: define {{.*}}@f2
+// CHECK: [[RESULT:%.+]] = alloca x86_fp80
+// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
+// CHECK: [[RES:%.+]] = alloca x86_fp80
+// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
+// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
+// CHECK: [[OVERFLOW_AREA:%.+]] = load {{.*}}, ptr [[OVERFLOW_AREA_P]]
+// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr ptr, ptr [[OVERFLOW_AREA]], i64 8
+// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
+// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[OVERFLOW_AREA]]
+// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
+// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
+// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
+// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
+// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
+// CHECK: ret x86_fp80 [[RETURN_VALUE]]