diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 4ff77bb64cf1c..f1aeeb13dcb57 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1013,14 +1013,6 @@ class TargetInfo : public TransferrableTargetInfo, return ComplexLongDoubleUsesFP2Ret; } - /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used - /// to convert to and from __fp16. - /// FIXME: This function should be removed once all targets stop using the - /// conversion intrinsics. - virtual bool useFP16ConversionIntrinsics() const { - return true; - } - /// Specify if mangling based on address space map should be used or /// not for language specific address spaces bool useAddressSpaceMapMangling() const { diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 2d3b8d2a8d950..2d2d469a5c7d6 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -154,10 +154,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { llvm::APInt getFMVPriority(ArrayRef Features) const override; - bool useFP16ConversionIntrinsics() const override { - return false; - } - void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; void getTargetDefinesARMV82A(const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 0076f822c02a1..0ec4c50bde8c4 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -268,8 +268,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { llvm::SmallVector getTargetBuiltins() const override; - bool useFP16ConversionIntrinsics() const override { return false; } - void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 43c4718f4735b..00c2918bd2f23 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -177,10 +177,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { bool setFPMath(StringRef Name) override; - bool useFP16ConversionIntrinsics() const override { - return false; - } - void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; void getTargetDefinesARMV82A(const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h index 7589b4309ebf5..ca470e126795f 100644 --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -68,7 +68,6 @@ class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo { resetDataLayout(); TheCXXABI.set(TargetCXXABI::GenericItanium); } - bool useFP16ConversionIntrinsics() const override { return false; } void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 31afd3eed96f9..45c0bd2896993 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -105,8 +105,6 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool hasBFloat16Type() const override { return true; } - bool useFP16ConversionIntrinsics() const override { return false; } - bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 6338a4f2f9036..b928fc800ecaa 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -81,8 +81,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { llvm::SmallVector getTargetBuiltins() const override; - bool useFP16ConversionIntrinsics() const override { return false; } - bool initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 685735b54a45b..9441204e02bc8 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -113,10 +113,6 @@ class RISCVTargetInfo : public TargetInfo { CallingConvCheckResult checkCallingConvention(CallingConv CC) const override; - bool useFP16ConversionIntrinsics() const override { - return false; - } - bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; bool isValidTuneCPUName(StringRef Name) const override; diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index d5374602caeaa..d05c0f01603d3 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -168,10 +168,6 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { } public: - // SPIR supports the half type and the only llvm intrinsic allowed in SPIR is - // memcpy as per section 3 of the SPIR spec. - bool useFP16ConversionIntrinsics() const override { return false; } - llvm::SmallVector getTargetBuiltins() const override { return {}; } diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 2bcf75deb0a91..80032da545e53 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -106,8 +106,6 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override; - bool useFP16ConversionIntrinsics() const override { return false; } - void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 3634330ec6698..ca8e3990cc54d 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -110,7 +110,6 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { StringRef getABI() const override; bool setABI(const std::string &Name) override; - bool useFP16ConversionIntrinsics() const override { return !HasFP16; } protected: void getTargetDefines(const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 922e32906cd04..d00f0acdee666 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -325,10 +325,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { return ""; } - bool useFP16ConversionIntrinsics() const override { - return false; - } - void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index ecb65d901de54..f797973683805 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -1793,18 +1793,10 @@ mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value, return cir::IntAttr::get(ty, value.getInt()); } case APValue::Float: { - const llvm::APFloat &init = value.getFloat(); - if (&init.getSemantics() == &llvm::APFloat::IEEEhalf() && - !cgm.getASTContext().getLangOpts().NativeHalfType && - cgm.getASTContext().getTargetInfo().useFP16ConversionIntrinsics()) { - cgm.errorNYI("ConstExprEmitter::tryEmitPrivate half"); - return {}; - } - mlir::Type ty = cgm.convertType(destType); assert(mlir::isa(ty) && "expected floating-point type"); - return cir::FPAttr::get(ty, init); + return cir::FPAttr::get(ty, value.getFloat()); } case APValue::Array: { const ArrayType *arrayTy = cgm.getASTContext().getAsArrayType(destType); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index a06f1f1dc1784..4b2572ce4858d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -924,19 +924,12 @@ class ScalarExprEmitter : public StmtVisitor { if (srcType->isHalfType() && !cgf.getContext().getLangOpts().NativeHalfType) { // Cast to FP using the intrinsic if the half type itself isn't supported. - if (mlir::isa(mlirDstType)) { - if (cgf.getContext().getTargetInfo().useFP16ConversionIntrinsics()) - cgf.getCIRGenModule().errorNYI(loc, - "cast via llvm.convert.from.fp16"); - } else { - // Cast to other types through float, using either the intrinsic or - // FPExt, depending on whether the half type itself is supported (as - // opposed to operations on half, available with NativeHalfType). - if (cgf.getContext().getTargetInfo().useFP16ConversionIntrinsics()) - cgf.getCIRGenModule().errorNYI(loc, - "cast via llvm.convert.from.fp16"); - // FIXME(cir): For now lets pretend we shouldn't use the conversion - // intrinsics and insert a cast here unconditionally. + if (!mlir::isa(mlirDstType)) { + // Cast to other types through float, using FPExt, depending on whether + // the half type itself is supported (as opposed to operations on half, + // available with NativeHalfType). FIXME(cir): For now lets pretend we + // shouldn't use the conversion intrinsics and insert a cast here + // unconditionally. src = builder.createCast(cgf.getLoc(loc), cir::CastKind::floating, src, cgf.floatTy); srcType = cgf.getContext().FloatTy; @@ -994,11 +987,6 @@ class ScalarExprEmitter : public StmtVisitor { res = emitScalarCast(src, srcType, dstType, mlirSrcType, mlirDstType, opts); if (mlirDstType != resTy) { - if (cgf.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { - cgf.getCIRGenModule().errorNYI(loc, "cast via llvm.convert.to.fp16"); - } - // FIXME(cir): For now we never use FP16 conversion intrinsics even if - // required by the target. Change that once this is implemented res = builder.createCast(cgf.getLoc(loc), cir::CastKind::floating, res, resTy); } diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index cddc849180971..23875d100ac0e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -396,13 +396,7 @@ mlir::Type CIRGenTypes::convertType(QualType type) { resultType = cgm.fP16Ty; break; case BuiltinType::Half: - if (astContext.getLangOpts().NativeHalfType || - !astContext.getTargetInfo().useFP16ConversionIntrinsics()) { - resultType = cgm.fP16Ty; - } else { - cgm.errorNYI(SourceLocation(), "processing of built-in type", type); - resultType = cgm.sInt32Ty; - } + resultType = cgm.fP16Ty; break; case BuiltinType::BFloat16: resultType = cgm.bFloat16Ty; diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 0eec4dba4824a..26cb3f39bfbd0 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -2469,16 +2469,8 @@ ConstantEmitter::tryEmitPrivate(const APValue &Value, QualType DestType, llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType()); return llvm::ConstantStruct::get(STy, Complex); } - case APValue::Float: { - const llvm::APFloat &Init = Value.getFloat(); - if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() && - !CGM.getContext().getLangOpts().NativeHalfType && - CGM.getContext().getTargetInfo().useFP16ConversionIntrinsics()) - return llvm::ConstantInt::get(CGM.getLLVMContext(), - Init.bitcastToAPInt()); - else - return llvm::ConstantFP::get(CGM.getLLVMContext(), Init); - } + case APValue::Float: + return llvm::ConstantFP::get(CGM.getLLVMContext(), Value.getFloat()); case APValue::ComplexFloat: { llvm::Constant *Complex[2]; diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 84421fef9f524..1b3486c86d493 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1596,23 +1596,11 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // Cast from half through float if half isn't a native type. if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Cast to FP using the intrinsic if the half type itself isn't supported. - if (DstTy->isFloatingPointTy()) { - if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) - return Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy), - Src); - } else { + if (!DstTy->isFloatingPointTy()) { // Cast to other types through float, using either the intrinsic or FPExt, // depending on whether the half type itself is supported // (as opposed to operations on half, available with NativeHalfType). - if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { - Src = Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, - CGF.CGM.FloatTy), - Src); - } else { - Src = Builder.CreateFPExt(Src, CGF.CGM.FloatTy, "conv"); - } + Src = Builder.CreateFPExt(Src, CGF.CGM.FloatTy, "conv"); SrcType = CGF.getContext().FloatTy; SrcTy = CGF.FloatTy; } @@ -1723,11 +1711,6 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Make sure we cast in a single step if from another FP type. if (SrcTy->isFloatingPointTy()) { - // Use the intrinsic if the half type itself isn't supported - // (as opposed to operations on half, available with NativeHalfType). - if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) - return Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src); // If the half type is supported, just use an fptrunc. return Builder.CreateFPTrunc(Src, DstTy); } @@ -1737,14 +1720,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, Res = EmitScalarCast(Src, SrcType, DstType, SrcTy, DstTy, Opts); if (DstTy != ResTy) { - if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { - assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion"); - Res = Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy), - Res); - } else { - Res = Builder.CreateFPTrunc(Res, ResTy, "conv"); - } + Res = Builder.CreateFPTrunc(Res, ResTy, "conv"); } if (Opts.EmitImplicitIntegerTruncationChecks) @@ -3399,14 +3375,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Another special case: half FP increment should be done via float - if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { - value = Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, - CGF.CGM.FloatTy), - input, "incdec.conv"); - } else { - value = Builder.CreateFPExt(input, CGF.CGM.FloatTy, "incdec.conv"); - } + value = Builder.CreateFPExt(input, CGF.CGM.FloatTy, "incdec.conv"); } if (value->getType()->isFloatTy()) @@ -3439,14 +3408,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec"); if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { - if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { - value = Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, - CGF.CGM.FloatTy), - value, "incdec.conv"); - } else { - value = Builder.CreateFPTrunc(value, input->getType(), "incdec.conv"); - } + value = Builder.CreateFPTrunc(value, input->getType(), "incdec.conv"); } // Fixed-point types. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 4239552d1299e..080082a8dc652 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -471,10 +471,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::Half: // Half FP can either be storage-only (lowered to i16) or native. - ResultType = getTypeForFormat( - getLLVMContext(), Context.getFloatTypeSemantics(T), - Context.getLangOpts().NativeHalfType || - !Context.getTargetInfo().useFP16ConversionIntrinsics()); + ResultType = getTypeForFormat(getLLVMContext(), + Context.getFloatTypeSemantics(T), true); break; case BuiltinType::LongDouble: LongDoubleReferenced = true; diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp index a4486965a851a..ee911f5258554 100644 --- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp @@ -375,23 +375,8 @@ static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS, CGF.EmitScalarExpr(E->getArg(1))}); } -static bool EnsureNativeHalfSupport(unsigned BuiltinID, const CallExpr *E, - CodeGenFunction &CGF) { - auto &C = CGF.CGM.getContext(); - if (!C.getLangOpts().NativeHalfType && - C.getTargetInfo().useFP16ConversionIntrinsics()) { - CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getQuotedName(BuiltinID) + - " requires native half type support."); - return false; - } - return true; -} - static Value *MakeHalfType(Function *Intrinsic, unsigned BuiltinID, const CallExpr *E, CodeGenFunction &CGF) { - if (!EnsureNativeHalfSupport(BuiltinID, E, CGF)) - return nullptr; - SmallVector Args; auto *FTy = Intrinsic->getFunctionType(); unsigned ICEArguments = 0; @@ -1069,13 +1054,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, EmitScalarExpr(E->getArg(0))); case NVPTX::BI__nvvm_ldg_h: case NVPTX::BI__nvvm_ldg_h2: - return EnsureNativeHalfSupport(BuiltinID, E, *this) ? MakeLdg(*this, E) - : nullptr; + return MakeLdg(*this, E); case NVPTX::BI__nvvm_ldu_h: case NVPTX::BI__nvvm_ldu_h2: - return EnsureNativeHalfSupport(BuiltinID, E, *this) - ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E) - : nullptr; + return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E); case NVPTX::BI__nvvm_cp_async_ca_shared_global_4: return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4, Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ba4b25961d70d..daddeef808f73 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5763,22 +5763,12 @@ bool Sema::BuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, if (OrigArg->isTypeDependent()) return false; - // Usual Unary Conversions will convert half to float, which we want for - // machines that use fp16 conversion intrinsics. Else, we wnat to leave the - // type how it is, but do normal L->Rvalue conversions. - if (Context.getTargetInfo().useFP16ConversionIntrinsics()) { - ExprResult Res = UsualUnaryConversions(OrigArg); - - if (!Res.isUsable()) - return true; - OrigArg = Res.get(); - } else { - ExprResult Res = DefaultFunctionArrayLvalueConversion(OrigArg); + // We want to leave the type how it is, but do normal L->Rvalue conversions. + ExprResult Res = DefaultFunctionArrayLvalueConversion(OrigArg); + if (!Res.isUsable()) + return true; + OrigArg = Res.get(); - if (!Res.isUsable()) - return true; - OrigArg = Res.get(); - } TheCall->setArg(FPArgNo, OrigArg); QualType VectorResultTy; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e12e4b204afad..d6609651e0ff8 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -15213,8 +15213,7 @@ static ExprResult convertHalfVecBinOp(Sema &S, ExprResult LHS, ExprResult RHS, /// is needed. static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx, Expr *E0, Expr *E1 = nullptr) { - if (!OpRequiresConversion || Ctx.getLangOpts().NativeHalfType || - Ctx.getTargetInfo().useFP16ConversionIntrinsics()) + if (!OpRequiresConversion || Ctx.getLangOpts().NativeHalfType) return false; auto HasVectorOfHalfType = [&Ctx](Expr *E) { diff --git a/clang/test/CodeGen/builtin_float.c b/clang/test/CodeGen/builtin_float.c index f79b6bd4ff5e0..1c1ce6195b4be 100644 --- a/clang/test/CodeGen/builtin_float.c +++ b/clang/test/CodeGen/builtin_float.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -emit-llvm -triple x86_64-windows-pc -o - %s | FileCheck %s --check-prefixes=CHECK,FP16 -// RUN: %clang_cc1 -emit-llvm -triple ppc64-be -o - %s -DNO_FP16 | FileCheck %s --check-prefixes=CHECK,NOFP16 +// RUN: %clang_cc1 -emit-llvm -triple x86_64-windows-pc -o - %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple ppc64-be -o - %s -DNO_FP16 | FileCheck %s // test to ensure that these builtins don't do the variadic promotion of float->double. void test_floats(float f1, float f2) { @@ -49,8 +49,7 @@ void test_half(__fp16 *H, __fp16 *H2) { // CHECK: fcmp ogt float // CHECK-NEXT: zext i1 (void)__builtin_isinf(*H); - // FP16: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 516) - // NOFP16: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 516) + // CHECK: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 516) } void test_mixed(double d1, float f2) { diff --git a/clang/test/CodeGen/builtin_float_strictfp.c b/clang/test/CodeGen/builtin_float_strictfp.c index b7cf567ccd66f..97c0dfcdb96c9 100644 --- a/clang/test/CodeGen/builtin_float_strictfp.c +++ b/clang/test/CodeGen/builtin_float_strictfp.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -emit-llvm -triple x86_64-windows-pc -ffp-exception-behavior=maytrap -o - %s | FileCheck %s --check-prefixes=CHECK,FP16 -// RUN: %clang_cc1 -emit-llvm -triple ppc64-be -ffp-exception-behavior=maytrap -o - %s | FileCheck %s --check-prefixes=CHECK,NOFP16 +// RUN: %clang_cc1 -emit-llvm -triple x86_64-windows-pc -ffp-exception-behavior=maytrap -o - %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple ppc64-be -ffp-exception-behavior=maytrap -o - %s | FileCheck %s // test to ensure that these builtins don't do the variadic promotion of float->double. @@ -11,20 +11,15 @@ // CHECK-LABEL: @test_half void test_half(__fp16 *H, __fp16 *H2) { (void)__builtin_isgreater(*H, *H2); - // FP16: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict") - // FP16: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict") + // CHECK: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict") + // CHECK: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict") // CHECK: call i1 @llvm.experimental.constrained.fcmp.f32(float %{{.*}}, float %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") // CHECK-NEXT: zext i1 (void)__builtin_isinf(*H); - // NOFP16: [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8 - // NOFP16-NEXT: [[IHALF:%.*]] = load i16, ptr [[LDADDR]], align 2 - // NOFP16-NEXT: [[CONV:%.*]] = call float @llvm.convert.from.fp16.f32(i16 [[IHALF]]) - // NOFP16-NEXT: [[RES1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[CONV]], i32 516) - // NOFP16-NEXT: zext i1 [[RES1]] to i32 - // FP16: [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8 - // FP16-NEXT: [[HALF:%.*]] = load half, ptr [[LDADDR]], align 2 - // FP16-NEXT: [[RES1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[HALF]], i32 516) - // FP16-NEXT: zext i1 [[RES1]] to i32 + // CHECK: [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8 + // CHECK-NEXT: [[HALF:%.*]] = load half, ptr [[LDADDR]], align 2 + // CHECK-NEXT: [[RES1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[HALF]], i32 516) + // CHECK-NEXT: zext i1 [[RES1]] to i32 } // CHECK-LABEL: @test_mixed diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 02932bc07f333..036cc6764b8db 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -21357,104 +21357,6 @@ The :ref:`align ` parameter attribute can be provided for the ``%Ptr`` arguments. -Half Precision Floating-Point Intrinsics ----------------------------------------- - -For most target platforms, half precision floating-point is a -storage-only format. This means that it is a dense encoding (in memory) -but does not support computation in the format. - -This means that code must first load the half-precision floating-point -value as an i16, then convert it to float with -:ref:`llvm.convert.from.fp16 `. Computation can -then be performed on the float value (including extending to double -etc). To store the value back to memory, it is first converted to float -if needed, then converted to i16 with -:ref:`llvm.convert.to.fp16 `, then storing as an -i16 value. - -.. _int_convert_to_fp16: - -'``llvm.convert.to.fp16``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -:: - - declare i16 @llvm.convert.to.fp16.f32(float %a) - declare i16 @llvm.convert.to.fp16.f64(double %a) - -Overview: -""""""""" - -The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a -conventional floating-point type to half precision floating-point format. - -Arguments: -"""""""""" - -The intrinsic function contains single argument - the value to be -converted. - -Semantics: -"""""""""" - -The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a -conventional floating-point format to half precision floating-point format. The -return value is an ``i16`` which contains the converted number. - -Examples: -""""""""" - -.. code-block:: llvm - - %res = call i16 @llvm.convert.to.fp16.f32(float %a) - store i16 %res, i16* @x, align 2 - -.. _int_convert_from_fp16: - -'``llvm.convert.from.fp16``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -:: - - declare float @llvm.convert.from.fp16.f32(i16 %a) - declare double @llvm.convert.from.fp16.f64(i16 %a) - -Overview: -""""""""" - -The '``llvm.convert.from.fp16``' intrinsic function performs a -conversion from half precision floating-point format to single precision -floating-point format. - -Arguments: -"""""""""" - -The intrinsic function contains single argument - the value to be -converted. - -Semantics: -"""""""""" - -The '``llvm.convert.from.fp16``' intrinsic function performs a -conversion from half single precision floating-point format to single -precision floating-point format. The input half-float value is -represented by an ``i16`` value. - -Examples: -""""""""" - -.. code-block:: llvm - - %a = load i16, ptr @x, align 2 - %res = call float @llvm.convert.from.fp16(i16 %a) - Saturating floating-point to integer conversions ------------------------------------------------ diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 910a50214df2f..09bdebca5b73d 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -76,6 +76,10 @@ Changes to the LLVM IR keep floating point support out of printf if it can be proven unused. * Case values are no longer operands of `SwitchInst`. +* Removed `llvm.convert.to.fp16` and `llvm.convert.from.fp16` + intrinsics. These are equivalent to `fptrunc` and `fpext` with half + with a bitcast. + Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 7eae4fd200500..e85e4c04c67ca 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1941,12 +1941,6 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly]>; def int_pseudoprobe : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly]>; -// Intrinsics to support half precision floating point format -let IntrProperties = [IntrNoMem] in { -def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; -def int_convert_from_fp16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>; -} - // Saturating floating point to integer intrinsics let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in { def int_fptoui_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index c3a3035fcaa9e..171605d53ab74 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1730,8 +1730,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::frexp: case Intrinsic::fptoui_sat: case Intrinsic::fptosi_sat: - case Intrinsic::convert_from_fp16: - case Intrinsic::convert_to_fp16: case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: @@ -2458,15 +2456,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, } if (auto *Op = dyn_cast(Operands[0])) { - if (IntrinsicID == Intrinsic::convert_to_fp16) { - APFloat Val(Op->getValueAPF()); - - bool lost = false; - Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); - - return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); - } - APFloat U = Op->getValueAPF(); if (IntrinsicID == Intrinsic::wasm_trunc_signed || @@ -3072,21 +3061,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantInt::get(Ty, Op->getValue().popcount()); case Intrinsic::bitreverse: return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); - case Intrinsic::convert_from_fp16: { - APFloat Val(APFloat::IEEEhalf(), Op->getValue()); - - bool lost = false; - APFloat::opStatus status = Val.convert( - Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); - - // Conversion is always precise. - (void)status; - assert(status != APFloat::opInexact && !lost && - "Precision lost during fp16 constfolding"); - - return ConstantFP::get(Ty, Val); - } - case Intrinsic::amdgcn_s_wqm: { uint64_t Val = Op->getZExtValue(); Val |= (Val & 0x5555555555555555ULL) << 1 | diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 12552bce3caaa..7de17979cc925 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2352,18 +2352,6 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } return true; } - case Intrinsic::convert_from_fp16: - // FIXME: This intrinsic should probably be removed from the IR. - MIRBuilder.buildFPExt(getOrCreateVReg(CI), - getOrCreateVReg(*CI.getArgOperand(0)), - MachineInstr::copyFlagsFromInstruction(CI)); - return true; - case Intrinsic::convert_to_fp16: - // FIXME: This intrinsic should probably be removed from the IR. - MIRBuilder.buildFPTrunc(getOrCreateVReg(CI), - getOrCreateVReg(*CI.getArgOperand(0)), - MachineInstr::copyFlagsFromInstruction(CI)); - return true; case Intrinsic::frexp: { ArrayRef VRegs = getOrCreateVRegs(CI); MIRBuilder.buildFFrexp(VRegs[0], VRegs[1], diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6c07ec9964515..156a64982ed6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7114,19 +7114,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } return; } - case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, - DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, - getValue(I.getArgOperand(0)), - DAG.getTargetConstant(0, sdl, - MVT::i32)))); - return; - case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, - TLI.getValueType(DAG.getDataLayout(), I.getType()), - DAG.getNode(ISD::BITCAST, sdl, MVT::f16, - getValue(I.getArgOperand(0))))); - return; case Intrinsic::fptosi_sat: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index c8004ee53c529..7e4a452db6ee3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1245,6 +1245,26 @@ static bool consumeNVVMPtrAddrSpace(StringRef &Name) { Name.consume_front("param"); } +static bool convertIntrinsicValidType(StringRef Name, + const FunctionType *FuncTy) { + Type *HalfTy = Type::getHalfTy(FuncTy->getContext()); + if (Name.starts_with("to.fp16")) { + return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0), + HalfTy) && + CastInst::castIsValid(Instruction::BitCast, HalfTy, + FuncTy->getReturnType()); + } + + if (Name.starts_with("from.fp16")) { + return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0), + HalfTy) && + CastInst::castIsValid(Instruction::FPExt, HalfTy, + FuncTy->getReturnType()); + } + + return false; +} + static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords) { assert(F && "Illegal to upgrade a non-existent Function."); @@ -1311,6 +1331,13 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } case 'c': { if (F->arg_size() == 1) { + if (Name.consume_front("convert.")) { + if (convertIntrinsicValidType(Name, F->getFunctionType())) { + NewFn = nullptr; + return true; + } + } + Intrinsic::ID ID = StringSwitch(Name) .StartsWith("ctlz.", Intrinsic::ctlz) .StartsWith("cttz.", Intrinsic::cttz) @@ -2682,9 +2709,9 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Arg, /*FMFSource=*/nullptr, "ctpop"); Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); } else if (Name == "h2f") { - Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16, - {Builder.getFloatTy()}, CI->getArgOperand(0), - /*FMFSource=*/nullptr, "h2f"); + Value *Cast = + Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy()); + Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy()); } else if (Name.consume_front("bitcast.") && (Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll")) { @@ -4784,6 +4811,23 @@ static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) { CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator()); } +static Value *upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, + Function *F, IRBuilder<> &Builder) { + if (Name.starts_with("to.fp16")) { + Value *Cast = + Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy()); + return Builder.CreateBitCast(Cast, CI->getType()); + } + + if (Name.starts_with("from.fp16")) { + Value *Cast = + Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy()); + return Builder.CreateFPExt(Cast, CI->getType()); + } + + return nullptr; +} + /// Upgrade a call to an old intrinsic. All argument and return casting must be /// provided to seamlessly integrate with existing context. void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { @@ -4801,9 +4845,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (!NewFn) { // Get the Function's name. StringRef Name = F->getName(); - - assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'"); - Name = Name.substr(5); + if (!Name.consume_front("llvm.")) + llvm_unreachable("intrinsic doesn't start with 'llvm.'"); bool IsX86 = Name.consume_front("x86."); bool IsNVVM = Name.consume_front("nvvm."); @@ -4827,6 +4870,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder); } else if (IsDbg) { upgradeDbgIntrinsicToDbgRecord(Name, CI); + } else if (Name.consume_front("convert.")) { + Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder); } else { llvm_unreachable("Unknown function for CallBase upgrade."); } @@ -5037,11 +5082,6 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { case Intrinsic::ctpop: NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); break; - - case Intrinsic::convert_from_fp16: - NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); - break; - case Intrinsic::dbg_value: { StringRef Name = F->getName(); Name = Name.substr(5); // Strip llvm. diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index f007886115d35..f88f3ef72e083 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2621,75 +2621,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // FIXME: Handle more intrinsics. switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::convert_from_fp16: - case Intrinsic::convert_to_fp16: { - if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) - return false; - - const Value *Op = II->getArgOperand(0); - Register InputReg = getRegForValue(Op); - if (!InputReg) - return false; - - // F16C only allows converting from float to half and from half to float. - bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; - if (IsFloatToHalf) { - if (!Op->getType()->isFloatTy()) - return false; - } else { - if (!II->getType()->isFloatTy()) - return false; - } - - Register ResultReg; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); - if (IsFloatToHalf) { - // 'InputReg' is implicitly promoted from register class FR32 to - // register class VR128 by method 'constrainOperandRegClass' which is - // directly called by 'fastEmitInst_ri'. - // Instruction VCVTPS2PHrr takes an extra immediate operand which is - // used to provide rounding control: use MXCSR.RC, encoded as 0b100. - // It's consistent with the other FP instructions, which are usually - // controlled by MXCSR. - unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr - : X86::VCVTPS2PHrr; - InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4); - - // Move the lower 32-bits of ResultReg to another register of class GR32. - Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr - : X86::VMOVPDI2DIrr; - ResultReg = createResultReg(&X86::GR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) - .addReg(InputReg, RegState::Kill); - - // The result value is in the lower 16-bits of ResultReg. - unsigned RegIdx = X86::sub_16bit; - ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx); - } else { - assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); - // Explicitly zero-extend the input to 32-bit. - InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg); - - // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. - InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, - InputReg); - - unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr - : X86::VCVTPH2PSrr; - InputReg = fastEmitInst_r(Opc, RC, InputReg); - - // The result value is in the lower 32-bits of ResultReg. - // Emit an explicit copy from register class VR128 to register class FR32. - ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(InputReg, RegState::Kill); - } - - updateValueMap(II, ResultReg); - return true; - } + default: + return false; case Intrinsic::frameaddress: { MachineFunction *MF = FuncInfo.MF; if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 0f749cf81f39b..75d3aed3867e2 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -118,7 +118,8 @@ define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: trunc i64 [[popc]] to i32 %r6 = call i32 @llvm.nvvm.popc.ll(i64 %b) -; CHECK: call float @llvm.convert.from.fp16.f32(i16 %c) +; CHECK: [[BITCAST_C:%.+]] = bitcast i16 %c to half +; CHECK-NEXT: = fpext half [[BITCAST_C]] to float %r7 = call float @llvm.nvvm.h2f(i16 %c) ret void } diff --git a/llvm/test/Bitcode/autoupgrade-convert-fp16-intrinsics-malformed.ll b/llvm/test/Bitcode/autoupgrade-convert-fp16-intrinsics-malformed.ll new file mode 100644 index 0000000000000..2035cb3aea55a --- /dev/null +++ b/llvm/test/Bitcode/autoupgrade-convert-fp16-intrinsics-malformed.ll @@ -0,0 +1,95 @@ +; RUN: rm -rf %t && split-file %s %t +; RUN: llvm-as < %t/missing-arg.ll | llvm-dis | FileCheck -check-prefix=MISSING-ARG %s +; RUN: llvm-as < %t/void-return.ll | llvm-dis | FileCheck -check-prefix=VOID %s +; RUN: llvm-as < %t/bfloat.ll | llvm-dis | FileCheck -check-prefix=BFLOAT %s +; RUN: llvm-as < %t/half.ll | llvm-dis | FileCheck -check-prefix=HALF %s +; RUN: llvm-as < %t/vector.ll | llvm-dis | FileCheck -check-prefix=VECTOR %s + +;--- missing-arg.ll + +define i16 @convert_to_fp16__missing_arg() { + ; MISSING-ARG: %result = call i16 @llvm.convert.to.fp16.f32() + %result = call i16 @llvm.convert.to.fp16.f32() + ret i16 %result +} + +define float @convert_from_fp16__f32_missing_arg() { +; MISSING-ARG: %result = call float @llvm.convert.from.fp16.f32() + %result = call float @llvm.convert.from.fp16.f32() + ret float %result +} + +declare i16 @llvm.convert.to.fp16.f32() +declare float @llvm.convert.from.fp16.f32() + + +;--- void-return.ll + +define void @convert_to_fp16__f32(float %src) { +; VOID: call void @llvm.convert.to.fp16.f32(float %src) + call void @llvm.convert.to.fp16.f32(float %src) + ret void +} + +define void @convert_from_fp16__f32(i16 %src) { +; VOID: call void @llvm.convert.from.fp16.f32(i16 %src) + call void @llvm.convert.from.fp16.f32(i16 %src) + ret void +} + +declare void @llvm.convert.to.fp16.f32(float) +declare void @llvm.convert.from.fp16.f32(i16) + +;--- bfloat.ll + +; Not well formed but the verifier never enforced this. +define i16 @convert_to_fp16__bf16(bfloat %src) { +; BFLOAT: %result = call i16 @llvm.convert.to.fp16.bf16(bfloat %src) + %result = call i16 @llvm.convert.to.fp16.bf16(bfloat %src) + ret i16 %result +} + +; Not well formed but the verifier never enforced this. +define bfloat @convert_from_fp16__bf16(i16 %src) { +; BFLOAT: %result = call bfloat @llvm.convert.from.fp16.bf16(i16 %src) + %result = call bfloat @llvm.convert.from.fp16.bf16(i16 %src) + ret bfloat %result +} + +declare i16 @llvm.convert.to.fp16.bf16(bfloat) +declare bfloat @llvm.convert.from.fp16.bf16(i16) + +;--- half.ll + +define i16 @convert_to_fp16__f16(half %src) { +; HALF: %result = call i16 @llvm.convert.to.fp16.f16(half %src) + %result = call i16 @llvm.convert.to.fp16.f16(half %src) + ret i16 %result +} + +define half @convert_from_fp16__f16(i16 %src) { +; HALF: %result = call half @llvm.convert.from.fp16.f16(i16 %src) + %result = call half @llvm.convert.from.fp16.f16(i16 %src) + ret half %result +} + +declare i16 @llvm.convert.to.fp16.f16(half) +declare half @llvm.convert.from.fp16.f16(i16) + +;--- vector.ll + +; These were not declared as supporting vectors. +define <2 x i16> @convert_to_fp16__v2f32(<2 x float> %src) { +; VECTOR: %result = call <2 x i16> @llvm.convert.to.fp16.v2f32(<2 x float> %src) + %result = call <2 x i16> @llvm.convert.to.fp16.v2f32(<2 x float> %src) + ret <2 x i16> %result +} + +define <2 x float> @convert_from_fp16__v2f32(<2 x i16> %src) { +; VECTOR: %result = call <2 x float> @llvm.convert.from.fp16.v2f32(<2 x i16> %src) + %result = call <2 x float> @llvm.convert.from.fp16.v2f32(<2 x i16> %src) + ret <2 x float> %result +} + +declare <2 x i16> @llvm.convert.to.fp16.v2f32(<2 x float>) +declare <2 x float> @llvm.convert.from.fp16.v2f32(<2 x i16>) diff --git a/llvm/test/Bitcode/autoupgrade-convert-fp16-intrinsics.ll b/llvm/test/Bitcode/autoupgrade-convert-fp16-intrinsics.ll new file mode 100644 index 0000000000000..3cc68ed474e41 --- /dev/null +++ b/llvm/test/Bitcode/autoupgrade-convert-fp16-intrinsics.ll @@ -0,0 +1,127 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + + +define i16 @convert_to_fp16__f32(float %src) { +; CHECK-LABEL: define i16 @convert_to_fp16__f32( +; CHECK-SAME: float [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = fptrunc float [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] +; + %result = call i16 @llvm.convert.to.fp16.f32(float %src) + ret i16 %result +} + +define i16 @convert_to_fp16__f64(double %src) { +; CHECK-LABEL: define i16 @convert_to_fp16__f64( +; CHECK-SAME: double [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = fptrunc double [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] +; + %result = call i16 @llvm.convert.to.fp16.f64(double %src) + ret i16 %result +} + +define i16 @convert_to_fp16__fp128(fp128 %src) { +; CHECK-LABEL: define i16 @convert_to_fp16__fp128( +; CHECK-SAME: fp128 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = fptrunc fp128 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] +; + %result = call i16 @llvm.convert.to.fp16.f128(fp128 %src) + ret i16 %result +} + +define i16 @convert_to_fp16__x86_fp80(x86_fp80 %src) { +; CHECK-LABEL: define i16 @convert_to_fp16__x86_fp80( +; CHECK-SAME: x86_fp80 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = fptrunc x86_fp80 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] +; + %result = call i16 @llvm.convert.to.fp16.f80(x86_fp80 %src) + ret i16 %result +} + +define i16 @convert_to_fp16__ppc_fp128(ppc_fp128 %src) { +; CHECK-LABEL: define i16 @convert_to_fp16__ppc_fp128( +; CHECK-SAME: ppc_fp128 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = fptrunc ppc_fp128 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] +; + %result = call i16 @llvm.convert.to.fp16.ppcf128(ppc_fp128 %src) + ret i16 %result +} + +define float @convert_from_fp16__f32(i16 %src) { +; CHECK-LABEL: define float @convert_from_fp16__f32( +; CHECK-SAME: i16 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = fpext half [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; + %result = call float @llvm.convert.from.fp16.f32(i16 %src) + ret float %result +} + +define double @convert_from_fp16__f64(i16 %src) { +; CHECK-LABEL: define double @convert_from_fp16__f64( +; CHECK-SAME: i16 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = fpext half [[TMP1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %result = call double @llvm.convert.from.fp16.f64(i16 %src) + ret double %result +} + +define fp128 @convert_from_fp16__fp128(i16 %src) { +; CHECK-LABEL: define fp128 @convert_from_fp16__fp128( +; CHECK-SAME: i16 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = fpext half [[TMP1]] to fp128 +; CHECK-NEXT: ret fp128 [[TMP2]] +; + %result = call fp128 @llvm.convert.from.fp16.f128(i16 %src) + ret fp128 %result +} + +define x86_fp80 @convert_from_fp16__x86_fp80(i16 %src) { +; CHECK-LABEL: define x86_fp80 @convert_from_fp16__x86_fp80( +; CHECK-SAME: i16 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = fpext half [[TMP1]] to x86_fp80 +; CHECK-NEXT: ret x86_fp80 [[TMP2]] +; + %result = call x86_fp80 @llvm.convert.from.fp16.f80(i16 %src) + ret x86_fp80 %result +} + +define ppc_fp128 @convert_from_fp16__ppc_fp128_fp80(i16 %src) { +; CHECK-LABEL: define ppc_fp128 @convert_from_fp16__ppc_fp128_fp80( +; CHECK-SAME: i16 [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[SRC]] to half +; CHECK-NEXT: [[TMP2:%.*]] = fpext half [[TMP1]] to ppc_fp128 +; CHECK-NEXT: ret ppc_fp128 [[TMP2]] +; + %result = call ppc_fp128 @llvm.convert.from.fp16.ppcf128(i16 %src) + ret ppc_fp128 %result +} + + +declare i16 @llvm.convert.to.fp16.f32(float) #0 +declare i16 @llvm.convert.to.fp16.f64(double) #0 +declare i16 @llvm.convert.to.fp16.f128(fp128) #0 +declare i16 @llvm.convert.to.fp16.f80(x86_fp80) #0 +declare i16 @llvm.convert.to.fp16.ppcf128(ppc_fp128) #0 + +declare float @llvm.convert.from.fp16.f32(i16) #0 +declare double @llvm.convert.from.fp16.f64(i16) #0 +declare fp128 @llvm.convert.from.fp16.f128(i16) #0 +declare x86_fp80 @llvm.convert.from.fp16.f80(i16) #0 +declare ppc_fp128 @llvm.convert.from.fp16.ppcf128(i16) #0 + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-convert-fp16-intrinsics.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-convert-fp16-intrinsics.ll deleted file mode 100644 index 065a3d8e4dd25..0000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-convert-fp16-intrinsics.ll +++ /dev/null @@ -1,31 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=aarch64-- -mcpu=falkor -mattr=+lse -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s - -define i16 @convert_to_fp16(float %src) { - ; CHECK-LABEL: name: convert_to_fp16 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 - ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 - %cvt = call i16 @llvm.convert.to.fp16.f32(float %src) - ret i16 %cvt -} - -define float @convert_from_fp16(i16 %src) { - ; CHECK-LABEL: name: convert_from_fp16 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK: $s0 = COPY [[FPEXT]](s32) - ; CHECK: RET_ReallyLR implicit $s0 - %cvt = call float @llvm.convert.from.fp16.f32(i16 %src) - ret float %cvt -} - -declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone -declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index 52ca22ba00794..94b494c8c08c4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -268,52 +268,3 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> ret <4 x float> %res } - -define i16 @to_half(float %in) { -; CHECK-SD-LABEL: to_half: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-FI-LABEL: to_half: -; CHECK-FI: // %bb.0: -; CHECK-FI-NEXT: fcvt h1, s0 -; CHECK-FI-NEXT: // implicit-def: $w0 -; CHECK-FI-NEXT: fmov s0, w0 -; CHECK-FI-NEXT: fmov s0, s1 -; CHECK-FI-NEXT: fmov w0, s0 -; CHECK-FI-NEXT: // kill: def $w1 killed $w0 -; CHECK-FI-NEXT: ret -; -; CHECK-GI-LABEL: to_half: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret - %res = call i16 @llvm.convert.to.fp16.f32(float %in) - ret i16 %res -} - -define float @from_half(i16 %in) { -; CHECK-SD-LABEL: from_half: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fcvt s0, h0 -; CHECK-SD-NEXT: ret -; -; CHECK-FI-LABEL: from_half: -; CHECK-FI: // %bb.0: -; CHECK-FI-NEXT: fmov s0, w0 -; CHECK-FI-NEXT: // kill: def $h0 killed $h0 killed $s0 -; CHECK-FI-NEXT: fcvt s0, h0 -; CHECK-FI-NEXT: ret -; -; CHECK-GI-LABEL: from_half: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: ret - %res = call float @llvm.convert.from.fp16.f32(i16 %in) - ret float %res -} diff --git a/llvm/test/CodeGen/AArch64/f16-convert.ll b/llvm/test/CodeGen/AArch64/f16-convert.ll deleted file mode 100644 index 03c7fe2e975ed..0000000000000 --- a/llvm/test/CodeGen/AArch64/f16-convert.ll +++ /dev/null @@ -1,256 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s - -define float @load0(ptr nocapture readonly %a) nounwind { -; CHECK-LABEL: load0: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0] -; CHECK-NEXT: fcvt s0, [[HREG]] -; CHECK-NEXT: ret - - %tmp = load i16, ptr %a, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - ret float %tmp1 -} - -define double @load1(ptr nocapture readonly %a) nounwind { -; CHECK-LABEL: load1: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0] -; CHECK-NEXT: fcvt d0, [[HREG]] -; CHECK-NEXT: ret - - %tmp = load i16, ptr %a, align 2 - %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) - ret double %conv -} - -define float @load2(ptr nocapture readonly %a, i32 %i) nounwind { -; CHECK-LABEL: load2: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1] -; CHECK-NEXT: fcvt s0, [[HREG]] -; CHECK-NEXT: ret - - %idxprom = sext i32 %i to i64 - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %idxprom - %tmp = load i16, ptr %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - ret float %tmp1 -} - -define double @load3(ptr nocapture readonly %a, i32 %i) nounwind { -; CHECK-LABEL: load3: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1] -; CHECK-NEXT: fcvt d0, [[HREG]] -; CHECK-NEXT: ret - - %idxprom = sext i32 %i to i64 - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %idxprom - %tmp = load i16, ptr %arrayidx, align 2 - %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) - ret double %conv -} - -define float @load4(ptr nocapture readonly %a, i64 %i) nounwind { -; CHECK-LABEL: load4: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1] -; CHECK-NEXT: fcvt s0, [[HREG]] -; CHECK-NEXT: ret - - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %i - %tmp = load i16, ptr %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - ret float %tmp1 -} - -define double @load5(ptr nocapture readonly %a, i64 %i) nounwind { -; CHECK-LABEL: load5: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1] -; CHECK-NEXT: fcvt d0, [[HREG]] -; CHECK-NEXT: ret - - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %i - %tmp = load i16, ptr %arrayidx, align 2 - %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) - ret double %conv -} - -define float @load6(ptr nocapture readonly %a) nounwind { -; CHECK-LABEL: load6: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20] -; CHECK-NEXT: fcvt s0, [[HREG]] -; CHECK-NEXT: ret - - %arrayidx = getelementptr inbounds i16, ptr %a, i64 10 - %tmp = load i16, ptr %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - ret float %tmp1 -} - -define double @load7(ptr nocapture readonly %a) nounwind { -; CHECK-LABEL: load7: -; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20] -; CHECK-NEXT: fcvt d0, [[HREG]] -; CHECK-NEXT: ret - - %arrayidx = getelementptr inbounds i16, ptr %a, i64 10 - %tmp = load i16, ptr %arrayidx, align 2 - %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) - ret double %conv -} - -define float @load8(ptr nocapture readonly %a) nounwind { -; CHECK-LABEL: load8: -; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20] -; CHECK-NEXT: fcvt s0, [[HREG]] -; CHECK-NEXT: ret - - %arrayidx = getelementptr inbounds i16, ptr %a, i64 -10 - %tmp = load i16, ptr %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - ret float %tmp1 -} - -define double @load9(ptr nocapture readonly %a) nounwind { -; CHECK-LABEL: load9: -; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20] -; CHECK-NEXT: fcvt d0, [[HREG]] -; CHECK-NEXT: ret - - %arrayidx = getelementptr inbounds i16, ptr %a, i64 -10 - %tmp = load i16, ptr %arrayidx, align 2 - %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) - ret double %conv -} - -define void @store0(ptr nocapture %a, float %val) nounwind { -; CHECK-LABEL: store0: -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0] -; CHECK-NEXT: ret - - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val) - store i16 %tmp, ptr %a, align 2 - ret void -} - -define void @store1(ptr nocapture %a, double %val) nounwind { -; CHECK-LABEL: store1: -; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0] -; CHECK-NEXT: ret - - %conv = fptrunc double %val to float - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv) - store i16 %tmp, ptr %a, align 2 - ret void -} - -define void @store2(ptr nocapture %a, i32 %i, float %val) nounwind { -; CHECK-LABEL: store2: -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0, w1, sxtw #1] -; CHECK-NEXT: ret - - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val) - %idxprom = sext i32 %i to i64 - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %idxprom - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store3(ptr nocapture %a, i32 %i, double %val) nounwind { -; CHECK-LABEL: store3: -; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0, w1, sxtw #1] -; CHECK-NEXT: ret - - %conv = fptrunc double %val to float - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv) - %idxprom = sext i32 %i to i64 - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %idxprom - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store4(ptr nocapture %a, i64 %i, float %val) nounwind { -; CHECK-LABEL: store4: -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0, x1, lsl #1] -; CHECK-NEXT: ret - - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val) - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %i - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store5(ptr nocapture %a, i64 %i, double %val) nounwind { -; CHECK-LABEL: store5: -; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0, x1, lsl #1] -; CHECK-NEXT: ret - - %conv = fptrunc double %val to float - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv) - %arrayidx = getelementptr inbounds i16, ptr %a, i64 %i - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store6(ptr nocapture %a, float %val) nounwind { -; CHECK-LABEL: store6: -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0, #20] -; CHECK-NEXT: ret - - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val) - %arrayidx = getelementptr inbounds i16, ptr %a, i64 10 - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store7(ptr nocapture %a, double %val) nounwind { -; CHECK-LABEL: store7: -; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: str h0, [x0, #20] -; CHECK-NEXT: ret - - %conv = fptrunc double %val to float - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv) - %arrayidx = getelementptr inbounds i16, ptr %a, i64 10 - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store8(ptr nocapture %a, float %val) nounwind { -; CHECK-LABEL: store8: -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: stur h0, [x0, #-20] -; CHECK-NEXT: ret - - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val) - %arrayidx = getelementptr inbounds i16, ptr %a, i64 -10 - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -define void @store9(ptr nocapture %a, double %val) nounwind { -; CHECK-LABEL: store9: -; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: stur h0, [x0, #-20] -; CHECK-NEXT: ret - - %conv = fptrunc double %val to float - %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv) - %arrayidx = getelementptr inbounds i16, ptr %a, i64 -10 - store i16 %tmp, ptr %arrayidx, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone -declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone -declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll deleted file mode 100644 index 42451f9b0f9d2..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll +++ /dev/null @@ -1,121 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefixes=GFX6 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s -; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=CYPRESS %s -; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefixes=CAYMAN %s - -declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone - -define amdgpu_kernel void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { -; GFX6-LABEL: test_convert_fp16_to_fp32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s10, s6 -; GFX6-NEXT: s_mov_b32 s11, s7 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s8, s2 -; GFX6-NEXT: s_mov_b32 s9, s3 -; GFX6-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; GFX6-NEXT: s_mov_b32 s4, s0 -; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GFX6-NEXT: s_endpgm -; -; GFX8-LABEL: test_convert_fp16_to_fp32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8-NEXT: s_mov_b32 s7, 0xf000 -; GFX8-NEXT: s_mov_b32 s6, -1 -; GFX8-NEXT: s_mov_b32 s10, s6 -; GFX8-NEXT: s_mov_b32 s11, s7 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s8, s2 -; GFX8-NEXT: s_mov_b32 s9, s3 -; GFX8-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; GFX8-NEXT: s_mov_b32 s4, s0 -; GFX8-NEXT: s_mov_b32 s5, s1 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GFX8-NEXT: s_endpgm -; -; GFX11-TRUE16-LABEL: test_convert_fp16_to_fp32: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1 -; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 -; GFX11-TRUE16-NEXT: s_mov_b32 s10, s6 -; GFX11-TRUE16-NEXT: s_mov_b32 s11, s7 -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s8, s2 -; GFX11-TRUE16-NEXT: s_mov_b32 s9, s3 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, s0 -; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l -; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 -; GFX11-TRUE16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: test_convert_fp16_to_fp32: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 -; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 -; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6 -; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2 -; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 -; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 -; GFX11-FAKE16-NEXT: s_endpgm -; -; CYPRESS-LABEL: test_convert_fp16_to_fp32: -; CYPRESS: ; %bb.0: -; CYPRESS-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] -; CYPRESS-NEXT: TEX 0 @6 -; CYPRESS-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] -; CYPRESS-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 -; CYPRESS-NEXT: CF_END -; CYPRESS-NEXT: PAD -; CYPRESS-NEXT: Fetch clause starting at 6: -; CYPRESS-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 -; CYPRESS-NEXT: ALU clause starting at 8: -; CYPRESS-NEXT: MOV * T0.X, KC0[2].Z, -; CYPRESS-NEXT: ALU clause starting at 9: -; CYPRESS-NEXT: FLT16_TO_FLT32 T0.X, T0.X, -; CYPRESS-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, -; CYPRESS-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; -; CAYMAN-LABEL: test_convert_fp16_to_fp32: -; CAYMAN: ; %bb.0: -; CAYMAN-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] -; CAYMAN-NEXT: TEX 0 @6 -; CAYMAN-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] -; CAYMAN-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X -; CAYMAN-NEXT: CF_END -; CAYMAN-NEXT: PAD -; CAYMAN-NEXT: Fetch clause starting at 6: -; CAYMAN-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 -; CAYMAN-NEXT: ALU clause starting at 8: -; CAYMAN-NEXT: MOV * T0.X, KC0[2].Z, -; CAYMAN-NEXT: ALU clause starting at 9: -; CAYMAN-NEXT: FLT16_TO_FLT32 * T0.X, T0.X, -; CAYMAN-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, -; CAYMAN-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %val = load i16, ptr addrspace(1) %in, align 2 - %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone - store float %cvt, ptr addrspace(1) %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll deleted file mode 100644 index 5849f0c2fca70..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll +++ /dev/null @@ -1,92 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefixes=GFX6 %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s - - -declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone - -define amdgpu_kernel void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { -; GFX6-LABEL: test_convert_fp16_to_fp64: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s10, s6 -; GFX6-NEXT: s_mov_b32 s11, s7 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s8, s2 -; GFX6-NEXT: s_mov_b32 s9, s3 -; GFX6-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; GFX6-NEXT: s_mov_b32 s4, s0 -; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 -; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX6-NEXT: s_endpgm -; -; GFX8-LABEL: test_convert_fp16_to_fp64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8-NEXT: s_mov_b32 s7, 0xf000 -; GFX8-NEXT: s_mov_b32 s6, -1 -; GFX8-NEXT: s_mov_b32 s10, s6 -; GFX8-NEXT: s_mov_b32 s11, s7 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s8, s2 -; GFX8-NEXT: s_mov_b32 s9, s3 -; GFX8-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; GFX8-NEXT: s_mov_b32 s4, s0 -; GFX8-NEXT: s_mov_b32 s5, s1 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX8-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 -; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX8-NEXT: s_endpgm -; -; GFX11-TRUE16-LABEL: test_convert_fp16_to_fp64: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1 -; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 -; GFX11-TRUE16-NEXT: s_mov_b32 s10, s6 -; GFX11-TRUE16-NEXT: s_mov_b32 s11, s7 -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s8, s2 -; GFX11-TRUE16-NEXT: s_mov_b32 s9, s3 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, s0 -; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 -; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0 -; GFX11-TRUE16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: test_convert_fp16_to_fp64: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 -; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 -; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6 -; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2 -; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 -; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 -; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0 -; GFX11-FAKE16-NEXT: s_endpgm - %val = load i16, ptr addrspace(1) %in, align 2 - %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone - store double %cvt, ptr addrspace(1) %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AVR/half.ll b/llvm/test/CodeGen/AVR/half.ll index c922293e417bd..f126e9ad3a797 100644 --- a/llvm/test/CodeGen/AVR/half.ll +++ b/llvm/test/CodeGen/AVR/half.ll @@ -28,97 +28,6 @@ define half @return(ptr %p) nounwind { ret half %r } -define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; CHECK-LABEL: loadd: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov r30, r24 -; CHECK-NEXT: mov r31, r25 -; CHECK-NEXT: ldd r24, Z+2 -; CHECK-NEXT: ldd r25, Z+3 -; CHECK-NEXT: rcall __extendhfsf2 -; CHECK-NEXT: rcall __extendsfdf2 -; CHECK-NEXT: ret -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) - ret double %1 -} - -define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; CHECK-LABEL: loadf: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov r30, r24 -; CHECK-NEXT: mov r31, r25 -; CHECK-NEXT: ldd r24, Z+2 -; CHECK-NEXT: ldd r25, Z+3 -; CHECK-NEXT: rcall __extendhfsf2 -; CHECK-NEXT: ret -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 -} - -define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { -; CHECK-LABEL: stored: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: push r16 -; CHECK-NEXT: push r17 -; CHECK-NEXT: mov r30, r22 -; CHECK-NEXT: mov r31, r23 -; CHECK-NEXT: mov r22, r20 -; CHECK-NEXT: mov r23, r21 -; CHECK-NEXT: mov r20, r18 -; CHECK-NEXT: mov r21, r19 -; CHECK-NEXT: mov r18, r16 -; CHECK-NEXT: mov r19, r17 -; CHECK-NEXT: mov r16, r24 -; CHECK-NEXT: mov r17, r25 -; CHECK-NEXT: mov r24, r30 -; CHECK-NEXT: mov r25, r31 -; CHECK-NEXT: rcall __truncdfhf2 -; CHECK-NEXT: mov r30, r16 -; CHECK-NEXT: mov r31, r17 -; CHECK-NEXT: std Z+1, r25 -; CHECK-NEXT: st Z, r24 -; CHECK-NEXT: pop r17 -; CHECK-NEXT: pop r16 -; CHECK-NEXT: ret -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) - store i16 %0, ptr %a, align 2 - ret void -} - -define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { -; CHECK-LABEL: storef: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: push r16 -; CHECK-NEXT: push r17 -; CHECK-NEXT: mov r18, r22 -; CHECK-NEXT: mov r19, r23 -; CHECK-NEXT: mov r16, r24 -; CHECK-NEXT: mov r17, r25 -; CHECK-NEXT: mov r22, r20 -; CHECK-NEXT: mov r23, r21 -; CHECK-NEXT: mov r24, r18 -; CHECK-NEXT: mov r25, r19 -; CHECK-NEXT: rcall __truncsfhf2 -; CHECK-NEXT: mov r30, r16 -; CHECK-NEXT: mov r31, r17 -; CHECK-NEXT: std Z+1, r25 -; CHECK-NEXT: st Z, r24 -; CHECK-NEXT: pop r17 -; CHECK-NEXT: pop r16 -; CHECK-NEXT: ret -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) - store i16 %0, ptr %a, align 2 - ret void -} - define void @test_load_store(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: test_load_store: ; CHECK: ; %bb.0: @@ -496,11 +405,11 @@ define half @test_select_cc(half) nounwind { ; CHECK-NEXT: mov r21, r17 ; CHECK-NEXT: rcall __nesf2 ; CHECK-NEXT: cpi r24, 0 -; CHECK-NEXT: breq .LBB25_2 +; CHECK-NEXT: breq .LBB21_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: ldi r16, 0 ; CHECK-NEXT: ldi r17, 60 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: .LBB21_2: ; CHECK-NEXT: mov r24, r16 ; CHECK-NEXT: mov r25, r17 ; CHECK-NEXT: pop r17 diff --git a/llvm/test/CodeGen/Hexagon/fp16.ll b/llvm/test/CodeGen/Hexagon/fp16.ll index 40211f2a1a656..15dc437dfbb0c 100644 --- a/llvm/test/CodeGen/Hexagon/fp16.ll +++ b/llvm/test/CodeGen/Hexagon/fp16.ll @@ -15,41 +15,45 @@ ;CHECK-LABEL: @test1 ;CHECK: jump __extendhfsf2 ;CHECK: r0 = memuh -define dso_local float @test1(ptr nocapture readonly %a) local_unnamed_addr #0 { +define dso_local float @test1(ptr readonly captures(none) %a) local_unnamed_addr #0 { entry: %0 = load i16, ptr %a, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + ret float %2 } ;CHECK-LABEL: @test2 ;CHECK: call __extendhfsf2 ;CHECK: r0 = memuh ;CHECK: convert_sf2d -define dso_local double @test2(ptr nocapture readonly %a) local_unnamed_addr #0 { +define dso_local double @test2(ptr readonly captures(none) %a) local_unnamed_addr #0 { entry: %0 = load i16, ptr %a, align 2 - %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) - ret double %1 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to double + ret double %2 } ;CHECK-LABEL: @test3 ;CHECK: call __truncsfhf2 ;CHECK: memh{{.*}}= r0 -define dso_local void @test3(float %src, ptr nocapture %dst) local_unnamed_addr #0 { +define dso_local void @test3(float %src, ptr captures(none) %dst) local_unnamed_addr #0 { entry: - %0 = tail call i16 @llvm.convert.to.fp16.f32(float %src) - store i16 %0, ptr %dst, align 2 + %0 = fptrunc float %src to half + %1 = bitcast half %0 to i16 + store i16 %1, ptr %dst, align 2 ret void } ;CHECK-LABEL: @test4 ;CHECK: call __truncdfhf2 ;CHECK: memh{{.*}}= r0 -define dso_local void @test4(double %src, ptr nocapture %dst) local_unnamed_addr #0 { +define dso_local void @test4(double %src, ptr captures(none) %dst) local_unnamed_addr #0 { entry: - %0 = tail call i16 @llvm.convert.to.fp16.f64(double %src) - store i16 %0, ptr %dst, align 2 + %0 = fptrunc double %src to half + %1 = bitcast half %0 to i16 + store i16 %1, ptr %dst, align 2 ret void } @@ -57,20 +61,16 @@ entry: ;CHECK: call __extendhfsf2 ;CHECK: call __extendhfsf2 ;CHECK: sfadd -define dso_local float @test5(ptr nocapture readonly %a, ptr nocapture readonly %b) local_unnamed_addr #0 { +define dso_local float @test5(ptr readonly captures(none) %a, ptr readonly captures(none) %b) local_unnamed_addr #0 { entry: %0 = load i16, ptr %a, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - %2 = load i16, ptr %b, align 2 - %3 = tail call float @llvm.convert.from.fp16.f32(i16 %2) - %add = fadd float %1, %3 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr %b, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %add = fadd float %2, %5 ret float %add } -declare float @llvm.convert.from.fp16.f32(i16) #1 -declare double @llvm.convert.from.fp16.f64(i16) #1 -declare i16 @llvm.convert.to.fp16.f32(float) #1 -declare i16 @llvm.convert.to.fp16.f64(double) #1 - attributes #0 = { nounwind readonly } -attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll index 84d7c9688d239..089b5bfab8fd9 100644 --- a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -571,27 +571,18 @@ define void @fadd() { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %2 = load i16, ptr @g, align 2 - %3 = call float @llvm.convert.from.fp16.f32(i16 %2) - %add = fadd float %1, %3 - - - %4 = call i16 @llvm.convert.to.fp16.f32(float %add) - - store i16 %4, ptr @g, align 2 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr @g, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %add = fadd float %2, %5 + %6 = fptrunc float %add to half + %7 = bitcast half %6 to i16 + store i16 %7, ptr @g, align 2 ret void } -; Function Attrs: nounwind readnone -declare float @llvm.convert.from.fp16.f32(i16) - -; Function Attrs: nounwind readnone -declare i16 @llvm.convert.to.fp16.f32(float) - -; Function Attrs: nounwind define void @fsub() { ; MIPS32-LABEL: fsub: ; MIPS32: # %bb.0: # %entry @@ -651,18 +642,15 @@ define void @fsub() { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %2 = load i16, ptr @g, align 2 - %3 = call float @llvm.convert.from.fp16.f32(i16 %2) - %sub = fsub float %1, %3 - - - %4 = call i16 @llvm.convert.to.fp16.f32(float %sub) - - - store i16 %4, ptr @g, align 2 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr @g, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %sub = fsub float %2, %5 + %6 = fptrunc float %sub to half + %7 = bitcast half %6 to i16 + store i16 %7, ptr @g, align 2 ret void } @@ -725,19 +713,15 @@ define void @fmult() { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %2 = load i16, ptr @g, align 2 - %3 = call float @llvm.convert.from.fp16.f32(i16 %2) - %mul = fmul float %1, %3 - - - %4 = call i16 @llvm.convert.to.fp16.f32(float %mul) - - - store i16 %4, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr @g, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %mul = fmul float %2, %5 + %6 = fptrunc float %mul to half + %7 = bitcast half %6 to i16 + store i16 %7, ptr @g, align 2 ret void } @@ -799,19 +783,16 @@ define void @fdiv() { ; MIPS64-N64-NEXT: jr $ra ; MIPS64-N64-NEXT: sh $2, 0($1) entry: - %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %2 = load i16, ptr @g, align 2 - %3 = call float @llvm.convert.from.fp16.f32(i16 %2) - %div = fdiv float %1, %3 - - - %4 = call i16 @llvm.convert.to.fp16.f32(float %div) - - store i16 %4, ptr @g, align 2 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr @g, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %div = fdiv float %2, %5 + %6 = fptrunc float %div to half + %7 = bitcast half %6 to i16 + store i16 %7, ptr @g, align 2 ret void } @@ -913,19 +894,15 @@ define void @frem() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %2 = load i16, ptr @g, align 2 - %3 = call float @llvm.convert.from.fp16.f32(i16 %2) - %rem = frem float %1, %3 - - - %4 = call i16 @llvm.convert.to.fp16.f32(float %rem) - - - store i16 %4, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr @g, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %rem = frem float %2, %5 + %6 = fptrunc float %rem to half + %7 = bitcast half %6 to i16 + store i16 %7, ptr @g, align 2 ret void } @@ -1044,16 +1021,14 @@ define void @fcmp() { ; MIPSR6-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - %2 = load i16, ptr @g, align 2 - %3 = call float @llvm.convert.from.fp16.f32(i16 %2) - %fcmp = fcmp oeq float %1, %3 - - - %4 = zext i1 %fcmp to i16 - store i16 %4, ptr @i1, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %3 = load i16, ptr @g, align 2 + %4 = bitcast i16 %3 to half + %5 = fpext half %4 to float + %fcmp = fcmp oeq float %2, %5 + %6 = zext i1 %fcmp to i16 + store i16 %6, ptr @i1, align 2 ret void } @@ -1118,16 +1093,12 @@ define void @fpowi() { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %powi = call float @llvm.powi.f32.i32(float %1, i32 2) - - - %2 = call i16 @llvm.convert.to.fp16.f32(float %powi) - - - store i16 %2, ptr @g, align 2 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %powi = call float @llvm.powi.f32.i32(float %2, i32 2) + %3 = fptrunc float %powi to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } @@ -1229,16 +1200,12 @@ define void @fpowi_var(i32 %var) { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %powi = call float @llvm.powi.f32.i32(float %1, i32 %var) - - - %2 = call i16 @llvm.convert.to.fp16.f32(float %powi) - - - store i16 %2, ptr @g, align 2 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %powi = call float @llvm.powi.f32.i32(float %2, i32 %var) + %3 = fptrunc float %powi to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } @@ -1342,19 +1309,16 @@ define void @fpow(float %var) { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %powi = call float @llvm.pow.f32(float %1, float %var) - - - %2 = call i16 @llvm.convert.to.fp16.f32(float %powi) - - - store i16 %2, ptr @g, align 2 + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %powi = call float @llvm.pow.f32(float %2, float %var) + %3 = fptrunc float %powi to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } + declare float @llvm.log2.f32(float %Val) define void @flog2() { @@ -1452,15 +1416,12 @@ define void @flog2() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %log2 = call float @llvm.log2.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %log2) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %log2 = call float @llvm.log2.f32(float %2) + %3 = fptrunc float %log2 to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } @@ -1561,15 +1522,12 @@ define void @flog10() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %log10 = call float @llvm.log10.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %log10) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %log10 = call float @llvm.log10.f32(float %2) + %3 = fptrunc float %log10 to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } @@ -1634,15 +1592,12 @@ define void @fsqrt() { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %sqrt = call float @llvm.sqrt.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %sqrt) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %sqrt = call float @llvm.sqrt.f32(float %2) + %3 = fptrunc float %sqrt to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } @@ -1743,19 +1698,16 @@ define void @fsin() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %sin = call float @llvm.sin.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %sin) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %sin = call float @llvm.sin.f32(float %2) + %3 = fptrunc float %sin to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.cos.f32(float %Val) +declare float @llvm.cos.f32(float) #0 define void @fcos() { ; MIPS32-LABEL: fcos: @@ -1852,19 +1804,16 @@ define void @fcos() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %cos = call float @llvm.cos.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %cos) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %cos = call float @llvm.cos.f32(float %2) + %3 = fptrunc float %cos to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.exp.f32(float %Val) +declare float @llvm.exp.f32(float) #0 define void @fexp() { ; MIPS32-LABEL: fexp: @@ -1961,18 +1910,16 @@ define void @fexp() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - %exp = call float @llvm.exp.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %exp) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %exp = call float @llvm.exp.f32(float %2) + %3 = fptrunc float %exp to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.exp2.f32(float %Val) +declare float @llvm.exp2.f32(float) #0 define void @fexp2() { ; MIPS32-LABEL: fexp2: @@ -2069,19 +2016,16 @@ define void @fexp2() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %exp2 = call float @llvm.exp2.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %exp2) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %exp2 = call float @llvm.exp2.f32(float %2) + %3 = fptrunc float %exp2 to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.fma.f32(float, float, float) +declare float @llvm.fma.f32(float, float, float) #0 define void @ffma(float %b, float %c) { ; MIPS32-LABEL: ffma: @@ -2185,22 +2129,16 @@ define void @ffma(float %b, float %c) { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %fma = call float @llvm.fma.f32(float %1, float %b, float %c) - %2 = call i16 @llvm.convert.to.fp16.f32(float %fma) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %fma = call float @llvm.fma.f32(float %2, float %b, float %c) + %3 = fptrunc float %fma to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -; FIXME: For MIPSR6, this should produced the maddf.s instruction. MIPSR5 cannot -; fuse the operation such that the intermediate result is not rounded. - -declare float @llvm.fmuladd.f32(float, float, float) +declare float @llvm.fmuladd.f32(float, float, float) #0 define void @ffmuladd(float %b, float %c) { ; MIPS32-O32-LABEL: ffmuladd: @@ -2321,21 +2259,16 @@ define void @ffmuladd(float %b, float %c) { ; MIPSR6-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - -; MIPS32-N32: madd.s $f[[F1:[0-9]]], $f13, $f[[F0]], $f12 -; MIPS32-N64: madd.s $f[[F1:[0-9]]], $f13, $f[[F0]], $f12 - - %fmuladd = call float @llvm.fmuladd.f32(float %1, float %b, float %c) - %2 = call i16 @llvm.convert.to.fp16.f32(float %fmuladd) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %fmuladd = call float @llvm.fmuladd.f32(float %2, float %b, float %c) + %3 = fptrunc float %fmuladd to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.fabs.f32(float %Val) +declare float @llvm.fabs.f32(float) #0 define void @ffabs() { ; MIPS32-LABEL: ffabs: @@ -2396,19 +2329,16 @@ define void @ffabs() { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %fabs = call float @llvm.fabs.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %fabs) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %fabs = call float @llvm.fabs.f32(float %2) + %3 = fptrunc float %fabs to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.minnum.f32(float %Val, float %b) +declare float @llvm.minnum.f32(float, float) #0 define void @fminnum(float %b) { ; MIPS32-O32-LABEL: fminnum: @@ -2565,19 +2495,16 @@ define void @fminnum(float %b) { ; MIPSR6-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %minnum = call float @llvm.minnum.f32(float %1, float %b) - %2 = call i16 @llvm.convert.to.fp16.f32(float %minnum) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %minnum = call float @llvm.minnum.f32(float %2, float %b) + %3 = fptrunc float %minnum to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.maxnum.f32(float %Val, float %b) +declare float @llvm.maxnum.f32(float, float) #0 define void @fmaxnum(float %b) { ; MIPS32-O32-LABEL: fmaxnum: @@ -2734,21 +2661,16 @@ define void @fmaxnum(float %b) { ; MIPSR6-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %maxnum = call float @llvm.maxnum.f32(float %1, float %b) - %2 = call i16 @llvm.convert.to.fp16.f32(float %maxnum) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %maxnum = call float @llvm.maxnum.f32(float %2, float %b) + %3 = fptrunc float %maxnum to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -; This expansion of fcopysign could be done without converting f16 to float. - -declare float @llvm.copysign.f32(float %Val, float %b) +declare float @llvm.copysign.f32(float, float) #0 define void @fcopysign(float %b) { ; MIPS32-LABEL: fcopysign: @@ -2809,19 +2731,16 @@ define void @fcopysign(float %b) { ; MIPS64-N64-NEXT: sh $2, 0($1) entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %copysign = call float @llvm.copysign.f32(float %1, float %b) - %2 = call i16 @llvm.convert.to.fp16.f32(float %copysign) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %copysign = call float @llvm.copysign.f32(float %2, float %b) + %3 = fptrunc float %copysign to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.floor.f32(float %Val) +declare float @llvm.floor.f32(float) #0 define void @ffloor() { ; MIPS32-LABEL: ffloor: @@ -2918,19 +2837,16 @@ define void @ffloor() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %floor = call float @llvm.floor.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %floor) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %floor = call float @llvm.floor.f32(float %2) + %3 = fptrunc float %floor to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.ceil.f32(float %Val) +declare float @llvm.ceil.f32(float) #0 define void @fceil() { ; MIPS32-LABEL: fceil: @@ -3027,19 +2943,16 @@ define void @fceil() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %ceil = call float @llvm.ceil.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %ceil) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %ceil = call float @llvm.ceil.f32(float %2) + %3 = fptrunc float %ceil to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.trunc.f32(float %Val) +declare float @llvm.trunc.f32(float) #0 define void @ftrunc() { ; MIPS32-LABEL: ftrunc: @@ -3136,19 +3049,16 @@ define void @ftrunc() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %trunc = call float @llvm.trunc.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %trunc) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %trunc = call float @llvm.trunc.f32(float %2) + %3 = fptrunc float %trunc to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.rint.f32(float %Val) +declare float @llvm.rint.f32(float) #0 define void @frint() { ; MIPS32-LABEL: frint: @@ -3245,17 +3155,16 @@ define void @frint() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - %rint = call float @llvm.rint.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %rint) - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %rint = call float @llvm.rint.f32(float %2) + %3 = fptrunc float %rint to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.nearbyint.f32(float %Val) +declare float @llvm.nearbyint.f32(float) #0 define void @fnearbyint() { ; MIPS32-LABEL: fnearbyint: @@ -3352,19 +3261,16 @@ define void @fnearbyint() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %nearbyint = call float @llvm.nearbyint.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %nearbyint) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %nearbyint = call float @llvm.nearbyint.f32(float %2) + %3 = fptrunc float %nearbyint to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } -declare float @llvm.round.f32(float %Val) +declare float @llvm.round.f32(float) #0 define void @fround() { ; MIPS32-LABEL: fround: @@ -3461,14 +3367,13 @@ define void @fround() { ; MIPS64-N64-NEXT: daddiu $sp, $sp, 32 entry: %0 = load i16, ptr @g, align 2 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - - - %round = call float @llvm.round.f32(float %1) - %2 = call i16 @llvm.convert.to.fp16.f32(float %round) - - - store i16 %2, ptr @g, align 2 - + %1 = bitcast i16 %0 to half + %2 = fpext half %1 to float + %round = call float @llvm.round.f32(float %2) + %3 = fptrunc float %round to half + %4 = bitcast half %3 to i16 + store i16 %4, ptr @g, align 2 ret void } + +attributes #0 = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/CodeGen/NVPTX/fp16.ll b/llvm/test/CodeGen/NVPTX/fp16.ll deleted file mode 100644 index b3f072afbd8c0..0000000000000 --- a/llvm/test/CodeGen/NVPTX/fp16.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc -mtriple=nvptx64 -verify-machineinstrs < %s | FileCheck %s -; RUN: %if ptxas %{ llc -mtriple=nvptx64 -verify-machineinstrs < %s | %ptxas-verify %} - -declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone -declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone -declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone - -; CHECK-LABEL: @test_convert_fp16_to_fp32 -; CHECK: cvt.f32.f16 -define void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { - %val = load i16, ptr addrspace(1) %in, align 2 - %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone - store float %cvt, ptr addrspace(1) %out, align 4 - ret void -} - - -; CHECK-LABEL: @test_convert_fp16_to_fp64 -; CHECK: cvt.f64.f16 -define void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { - %val = load i16, ptr addrspace(1) %in, align 2 - %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone - store double %cvt, ptr addrspace(1) %out, align 4 - ret void -} - - -; CHECK-LABEL: @test_convert_fp32_to_fp16 -; CHECK: cvt.rn.f16.f32 -define void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { - %val = load float, ptr addrspace(1) %in, align 2 - %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone - store i16 %cvt, ptr addrspace(1) %out, align 4 - ret void -} - - -; CHECK-LABEL: @test_convert_fp64_to_fp16 -; CHECK: cvt.rn.f16.f64 -define void @test_convert_fp64_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { - %val = load double, ptr addrspace(1) %in, align 2 - %cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone - store i16 %cvt, ptr addrspace(1) %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll index 903ea691ae6ba..0956189ac3d6d 100644 --- a/llvm/test/CodeGen/PowerPC/half.ll +++ b/llvm/test/CodeGen/PowerPC/half.ll @@ -133,307 +133,6 @@ define half @return(ptr %p) nounwind { ret half %r } -define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; PPC32-LABEL: loadd: -; PPC32: # %bb.0: # %entry -; PPC32-NEXT: mflr r0 -; PPC32-NEXT: stwu r1, -16(r1) -; PPC32-NEXT: stw r0, 20(r1) -; PPC32-NEXT: lhz r3, 2(r3) -; PPC32-NEXT: bl __extendhfsf2 -; PPC32-NEXT: lwz r0, 20(r1) -; PPC32-NEXT: addi r1, r1, 16 -; PPC32-NEXT: mtlr r0 -; PPC32-NEXT: blr -; -; P8-LABEL: loadd: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 2(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; P9-LABEL: loadd: -; P9: # %bb.0: # %entry -; P9-NEXT: addi r3, r3, 2 -; P9-NEXT: lxsihzx f0, 0, r3 -; P9-NEXT: xscvhpdp f1, f0 -; P9-NEXT: blr -; -; SOFT-LABEL: loadd: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 2(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -; -; BE-LABEL: loadd: -; BE: # %bb.0: # %entry -; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -112(r1) -; BE-NEXT: std r0, 128(r1) -; BE-NEXT: lhz r3, 2(r3) -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: addi r1, r1, 112 -; BE-NEXT: ld r0, 16(r1) -; BE-NEXT: mtlr r0 -; BE-NEXT: blr -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) - ret double %1 -} - -declare double @llvm.convert.from.fp16.f64(i16) - -define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; PPC32-LABEL: loadf: -; PPC32: # %bb.0: # %entry -; PPC32-NEXT: mflr r0 -; PPC32-NEXT: stwu r1, -16(r1) -; PPC32-NEXT: stw r0, 20(r1) -; PPC32-NEXT: lhz r3, 2(r3) -; PPC32-NEXT: bl __extendhfsf2 -; PPC32-NEXT: lwz r0, 20(r1) -; PPC32-NEXT: addi r1, r1, 16 -; PPC32-NEXT: mtlr r0 -; PPC32-NEXT: blr -; -; P8-LABEL: loadf: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 2(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; P9-LABEL: loadf: -; P9: # %bb.0: # %entry -; P9-NEXT: addi r3, r3, 2 -; P9-NEXT: lxsihzx f0, 0, r3 -; P9-NEXT: xscvhpdp f1, f0 -; P9-NEXT: blr -; -; SOFT-LABEL: loadf: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 2(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -; -; BE-LABEL: loadf: -; BE: # %bb.0: # %entry -; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -112(r1) -; BE-NEXT: std r0, 128(r1) -; BE-NEXT: lhz r3, 2(r3) -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: addi r1, r1, 112 -; BE-NEXT: ld r0, 16(r1) -; BE-NEXT: mtlr r0 -; BE-NEXT: blr -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 -} - -declare float @llvm.convert.from.fp16.f32(i16) - -define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { -; PPC32-LABEL: stored: -; PPC32: # %bb.0: # %entry -; PPC32-NEXT: mflr r0 -; PPC32-NEXT: stwu r1, -16(r1) -; PPC32-NEXT: stw r0, 20(r1) -; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill -; PPC32-NEXT: mr r30, r3 -; PPC32-NEXT: bl __truncdfhf2 -; PPC32-NEXT: sth r3, 0(r30) -; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload -; PPC32-NEXT: lwz r0, 20(r1) -; PPC32-NEXT: addi r1, r1, 16 -; PPC32-NEXT: mtlr r0 -; PPC32-NEXT: blr -; -; P8-LABEL: stored: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; P9-LABEL: stored: -; P9: # %bb.0: # %entry -; P9-NEXT: xscvdphp f0, f1 -; P9-NEXT: stxsihx f0, 0, r3 -; P9-NEXT: blr -; -; SOFT-LABEL: stored: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: mr r3, r4 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -; -; BE-LABEL: stored: -; BE: # %bb.0: # %entry -; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -128(r1) -; BE-NEXT: std r0, 144(r1) -; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncdfhf2 -; BE-NEXT: nop -; BE-NEXT: sth r3, 0(r30) -; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; BE-NEXT: addi r1, r1, 128 -; BE-NEXT: ld r0, 16(r1) -; BE-NEXT: mtlr r0 -; BE-NEXT: blr -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) - store i16 %0, ptr %a, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f64(double) - -define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { -; PPC32-LABEL: storef: -; PPC32: # %bb.0: # %entry -; PPC32-NEXT: mflr r0 -; PPC32-NEXT: stwu r1, -16(r1) -; PPC32-NEXT: stw r0, 20(r1) -; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill -; PPC32-NEXT: mr r30, r3 -; PPC32-NEXT: bl __truncsfhf2 -; PPC32-NEXT: sth r3, 0(r30) -; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload -; PPC32-NEXT: lwz r0, 20(r1) -; PPC32-NEXT: addi r1, r1, 16 -; PPC32-NEXT: mtlr r0 -; PPC32-NEXT: blr -; -; P8-LABEL: storef: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; P9-LABEL: storef: -; P9: # %bb.0: # %entry -; P9-NEXT: xscvdphp f0, f1 -; P9-NEXT: stxsihx f0, 0, r3 -; P9-NEXT: blr -; -; SOFT-LABEL: storef: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: clrldi r3, r4, 32 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -; -; BE-LABEL: storef: -; BE: # %bb.0: # %entry -; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -128(r1) -; BE-NEXT: std r0, 144(r1) -; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: sth r3, 0(r30) -; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; BE-NEXT: addi r1, r1, 128 -; BE-NEXT: ld r0, 16(r1) -; BE-NEXT: mtlr r0 -; BE-NEXT: blr -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) - store i16 %0, ptr %a, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f32(float) define void @test_load_store(ptr %in, ptr %out) nounwind { ; PPC32-LABEL: test_load_store: ; PPC32: # %bb.0: @@ -1063,13 +762,13 @@ define void @test_sitofp_i64(i64 %a, ptr %p) nounwind { ; BE-NEXT: cmpldi r5, 1 ; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r4 -; BE-NEXT: ble cr0, .LBB16_2 +; BE-NEXT: ble cr0, .LBB12_2 ; BE-NEXT: # %bb.1: ; BE-NEXT: clrldi r4, r3, 53 ; BE-NEXT: addi r4, r4, 2047 ; BE-NEXT: or r3, r4, r3 ; BE-NEXT: rldicr r3, r3, 0, 52 -; BE-NEXT: .LBB16_2: +; BE-NEXT: .LBB12_2: ; BE-NEXT: std r3, 120(r1) ; BE-NEXT: lfd f0, 120(r1) ; BE-NEXT: fcfid f0, f0 @@ -1147,24 +846,24 @@ define i64 @test_fptoui_i64(ptr %p) nounwind { ; BE-NEXT: lhz r3, 0(r3) ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha -; BE-NEXT: lfs f0, .LCPI17_0@toc@l(r3) +; BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; BE-NEXT: lfs f0, .LCPI13_0@toc@l(r3) ; BE-NEXT: fsubs f2, f1, f0 ; BE-NEXT: fcmpu cr0, f1, f0 ; BE-NEXT: fctidz f2, f2 ; BE-NEXT: stfd f2, 120(r1) ; BE-NEXT: fctidz f2, f1 ; BE-NEXT: stfd f2, 112(r1) -; BE-NEXT: blt cr0, .LBB17_2 +; BE-NEXT: blt cr0, .LBB13_2 ; BE-NEXT: # %bb.1: ; BE-NEXT: ld r3, 120(r1) ; BE-NEXT: li r4, 1 ; BE-NEXT: rldic r4, r4, 63, 0 ; BE-NEXT: xor r3, r3, r4 -; BE-NEXT: b .LBB17_3 -; BE-NEXT: .LBB17_2: +; BE-NEXT: b .LBB13_3 +; BE-NEXT: .LBB13_2: ; BE-NEXT: ld r3, 112(r1) -; BE-NEXT: .LBB17_3: +; BE-NEXT: .LBB13_3: ; BE-NEXT: addi r1, r1, 128 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 @@ -1250,16 +949,16 @@ define void @test_uitofp_i64(i64 %a, ptr %p) nounwind { ; BE-NEXT: cmpldi r5, 1 ; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r4 -; BE-NEXT: bgt cr0, .LBB18_2 +; BE-NEXT: bgt cr0, .LBB14_2 ; BE-NEXT: # %bb.1: ; BE-NEXT: mr r4, r3 -; BE-NEXT: b .LBB18_3 -; BE-NEXT: .LBB18_2: +; BE-NEXT: b .LBB14_3 +; BE-NEXT: .LBB14_2: ; BE-NEXT: clrldi r4, r3, 53 ; BE-NEXT: addi r4, r4, 2047 ; BE-NEXT: or r4, r4, r3 ; BE-NEXT: rldicr r4, r4, 0, 52 -; BE-NEXT: .LBB18_3: +; BE-NEXT: .LBB14_3: ; BE-NEXT: rldicl r5, r3, 10, 54 ; BE-NEXT: clrldi r6, r3, 63 ; BE-NEXT: std r4, 112(r1) @@ -1267,28 +966,28 @@ define void @test_uitofp_i64(i64 %a, ptr %p) nounwind { ; BE-NEXT: cmpldi r5, 1 ; BE-NEXT: rldicl r5, r3, 63, 1 ; BE-NEXT: or r4, r6, r5 -; BE-NEXT: ble cr0, .LBB18_5 +; BE-NEXT: ble cr0, .LBB14_5 ; BE-NEXT: # %bb.4: ; BE-NEXT: clrldi r4, r4, 53 ; BE-NEXT: addi r4, r4, 2047 ; BE-NEXT: or r4, r4, r5 ; BE-NEXT: rldicl r4, r4, 53, 11 ; BE-NEXT: rldicl r4, r4, 11, 1 -; BE-NEXT: .LBB18_5: +; BE-NEXT: .LBB14_5: ; BE-NEXT: cmpdi r3, 0 ; BE-NEXT: std r4, 120(r1) -; BE-NEXT: bc 12, lt, .LBB18_7 +; BE-NEXT: bc 12, lt, .LBB14_7 ; BE-NEXT: # %bb.6: ; BE-NEXT: lfd f0, 112(r1) ; BE-NEXT: fcfid f0, f0 ; BE-NEXT: frsp f1, f0 -; BE-NEXT: b .LBB18_8 -; BE-NEXT: .LBB18_7: +; BE-NEXT: b .LBB14_8 +; BE-NEXT: .LBB14_7: ; BE-NEXT: lfd f0, 120(r1) ; BE-NEXT: fcfid f0, f0 ; BE-NEXT: frsp f0, f0 ; BE-NEXT: fadds f1, f0, f0 -; BE-NEXT: .LBB18_8: +; BE-NEXT: .LBB14_8: ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: sth r3, 0(r30) @@ -2130,10 +1829,10 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind { ; PPC32-NEXT: stw r3, 8(r1) ; PPC32-NEXT: xoris r3, r30, 32768 ; PPC32-NEXT: stw r3, 12(r1) -; PPC32-NEXT: lis r3, .LCPI23_0@ha +; PPC32-NEXT: lis r3, .LCPI19_0@ha ; PPC32-NEXT: fmr f31, f1 ; PPC32-NEXT: lfd f0, 8(r1) -; PPC32-NEXT: lfs f1, .LCPI23_0@l(r3) +; PPC32-NEXT: lfs f1, .LCPI19_0@l(r3) ; PPC32-NEXT: fsub f0, f0, f1 ; PPC32-NEXT: frsp f1, f0 ; PPC32-NEXT: bl __truncsfhf2 @@ -2265,16 +1964,16 @@ define half @PR40273(half) nounwind { ; PPC32-NEXT: bl __truncsfhf2 ; PPC32-NEXT: clrlwi r3, r3, 16 ; PPC32-NEXT: bl __extendhfsf2 -; PPC32-NEXT: lis r3, .LCPI24_0@ha -; PPC32-NEXT: lfs f0, .LCPI24_0@l(r3) +; PPC32-NEXT: lis r3, .LCPI20_0@ha +; PPC32-NEXT: lfs f0, .LCPI20_0@l(r3) ; PPC32-NEXT: li r3, 0 ; PPC32-NEXT: fcmpu cr0, f1, f0 -; PPC32-NEXT: bc 12, eq, .LBB24_2 +; PPC32-NEXT: bc 12, eq, .LBB20_2 ; PPC32-NEXT: # %bb.1: ; PPC32-NEXT: li r3, 4 -; PPC32-NEXT: .LBB24_2: -; PPC32-NEXT: li r4, .LCPI24_1@l -; PPC32-NEXT: addis r4, r4, .LCPI24_1@ha +; PPC32-NEXT: .LBB20_2: +; PPC32-NEXT: li r4, .LCPI20_1@l +; PPC32-NEXT: addis r4, r4, .LCPI20_1@ha ; PPC32-NEXT: lfsx f1, r4, r3 ; PPC32-NEXT: lwz r0, 20(r1) ; PPC32-NEXT: addi r1, r1, 16 @@ -2294,11 +1993,11 @@ define half @PR40273(half) nounwind { ; P8-NEXT: fmr f0, f1 ; P8-NEXT: xxlxor f1, f1, f1 ; P8-NEXT: fcmpu cr0, f0, f1 -; P8-NEXT: beq cr0, .LBB24_2 +; P8-NEXT: beq cr0, .LBB20_2 ; P8-NEXT: # %bb.1: ; P8-NEXT: vspltisw v2, 1 ; P8-NEXT: xvcvsxwdp vs1, vs34 -; P8-NEXT: .LBB24_2: +; P8-NEXT: .LBB20_2: ; P8-NEXT: addi r1, r1, 32 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 @@ -2350,16 +2049,16 @@ define half @PR40273(half) nounwind { ; BE-NEXT: clrldi r3, r3, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha -; BE-NEXT: lfs f0, .LCPI24_0@toc@l(r3) +; BE-NEXT: addis r3, r2, .LCPI20_0@toc@ha +; BE-NEXT: lfs f0, .LCPI20_0@toc@l(r3) ; BE-NEXT: li r3, 0 ; BE-NEXT: fcmpu cr0, f1, f0 -; BE-NEXT: bc 12, eq, .LBB24_2 +; BE-NEXT: bc 12, eq, .LBB20_2 ; BE-NEXT: # %bb.1: ; BE-NEXT: li r3, 4 -; BE-NEXT: .LBB24_2: -; BE-NEXT: addis r4, r2, .LCPI24_1@toc@ha -; BE-NEXT: addi r4, r4, .LCPI24_1@toc@l +; BE-NEXT: .LBB20_2: +; BE-NEXT: addis r4, r2, .LCPI20_1@toc@ha +; BE-NEXT: addi r4, r4, .LCPI20_1@toc@l ; BE-NEXT: lfsx f1, r4, r3 ; BE-NEXT: addi r1, r1, 112 ; BE-NEXT: ld r0, 16(r1) @@ -2452,13 +2151,13 @@ define half @fcopysign(half %x, half %y) nounwind { ; PPC32-NEXT: lwz r3, 20(r1) ; PPC32-NEXT: srwi r3, r3, 31 ; PPC32-NEXT: andi. r3, r3, 1 -; PPC32-NEXT: bc 12, gt, .LBB26_2 +; PPC32-NEXT: bc 12, gt, .LBB22_2 ; PPC32-NEXT: # %bb.1: ; PPC32-NEXT: fabs f1, f1 -; PPC32-NEXT: b .LBB26_3 -; PPC32-NEXT: .LBB26_2: +; PPC32-NEXT: b .LBB22_3 +; PPC32-NEXT: .LBB22_2: ; PPC32-NEXT: fnabs f1, f1 -; PPC32-NEXT: .LBB26_3: +; PPC32-NEXT: .LBB22_3: ; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload ; PPC32-NEXT: lwz r0, 36(r1) ; PPC32-NEXT: addi r1, r1, 32 @@ -2530,13 +2229,13 @@ define half @fcopysign(half %x, half %y) nounwind { ; BE-NEXT: lwz r3, 116(r1) ; BE-NEXT: srwi r3, r3, 31 ; BE-NEXT: andi. r3, r3, 1 -; BE-NEXT: bc 12, gt, .LBB26_2 +; BE-NEXT: bc 12, gt, .LBB22_2 ; BE-NEXT: # %bb.1: ; BE-NEXT: fabs f1, f1 -; BE-NEXT: b .LBB26_3 -; BE-NEXT: .LBB26_2: +; BE-NEXT: b .LBB22_3 +; BE-NEXT: .LBB22_2: ; BE-NEXT: fnabs f1, f1 -; BE-NEXT: .LBB26_3: +; BE-NEXT: .LBB22_3: ; BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload ; BE-NEXT: addi r1, r1, 128 ; BE-NEXT: ld r0, 16(r1) diff --git a/llvm/test/CodeGen/SPARC/half.ll b/llvm/test/CodeGen/SPARC/half.ll index 565160149e715..3721c7456d02b 100644 --- a/llvm/test/CodeGen/SPARC/half.ll +++ b/llvm/test/CodeGen/SPARC/half.ll @@ -26,109 +26,6 @@ define half @return(ptr %p) nounwind { ret half %r } -define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; SPARC32-LABEL: loadd: -; SPARC32: ! %bb.0: ! %entry -; SPARC32-NEXT: save %sp, -96, %sp -; SPARC32-NEXT: call __extendhfsf2 -; SPARC32-NEXT: lduh [%i0+2], %o0 -; SPARC32-NEXT: fstod %f0, %f0 -; SPARC32-NEXT: ret -; SPARC32-NEXT: restore -; -; SPARC64-LABEL: loadd: -; SPARC64: ! %bb.0: ! %entry -; SPARC64-NEXT: save %sp, -176, %sp -; SPARC64-NEXT: call __extendhfsf2 -; SPARC64-NEXT: lduh [%i0+2], %o0 -; SPARC64-NEXT: fstod %f0, %f0 -; SPARC64-NEXT: ret -; SPARC64-NEXT: restore -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) - ret double %1 -} - -define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; SPARC32-LABEL: loadf: -; SPARC32: ! %bb.0: ! %entry -; SPARC32-NEXT: save %sp, -96, %sp -; SPARC32-NEXT: call __extendhfsf2 -; SPARC32-NEXT: lduh [%i0+2], %o0 -; SPARC32-NEXT: ret -; SPARC32-NEXT: restore -; -; SPARC64-LABEL: loadf: -; SPARC64: ! %bb.0: ! %entry -; SPARC64-NEXT: save %sp, -176, %sp -; SPARC64-NEXT: call __extendhfsf2 -; SPARC64-NEXT: lduh [%i0+2], %o0 -; SPARC64-NEXT: ret -; SPARC64-NEXT: restore -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 -} - -define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { -; SPARC32-LABEL: stored: -; SPARC32: ! %bb.0: ! %entry -; SPARC32-NEXT: save %sp, -112, %sp -; SPARC32-NEXT: mov %i2, %i3 -; SPARC32-NEXT: mov %i1, %i2 -; SPARC32-NEXT: std %i2, [%fp+-8] -; SPARC32-NEXT: ldd [%fp+-8], %f0 -; SPARC32-NEXT: std %f0, [%fp+-16] -; SPARC32-NEXT: call __truncdfhf2 -; SPARC32-NEXT: ldd [%fp+-16], %o0 -; SPARC32-NEXT: sth %o0, [%i0] -; SPARC32-NEXT: ret -; SPARC32-NEXT: restore -; -; SPARC64-LABEL: stored: -; SPARC64: ! %bb.0: ! %entry -; SPARC64-NEXT: save %sp, -176, %sp -; SPARC64-NEXT: fmovd %f2, %f0 -; SPARC64-NEXT: call __truncdfhf2 -; SPARC64-NEXT: nop -; SPARC64-NEXT: sth %o0, [%i0] -; SPARC64-NEXT: ret -; SPARC64-NEXT: restore -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) - store i16 %0, ptr %a, align 2 - ret void -} - -define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { -; SPARC32-LABEL: storef: -; SPARC32: ! %bb.0: ! %entry -; SPARC32-NEXT: save %sp, -96, %sp -; SPARC32-NEXT: call __truncsfhf2 -; SPARC32-NEXT: mov %i1, %o0 -; SPARC32-NEXT: sth %o0, [%i0] -; SPARC32-NEXT: ret -; SPARC32-NEXT: restore -; -; SPARC64-LABEL: storef: -; SPARC64: ! %bb.0: ! %entry -; SPARC64-NEXT: save %sp, -176, %sp -; SPARC64-NEXT: fmovs %f3, %f1 -; SPARC64-NEXT: call __truncsfhf2 -; SPARC64-NEXT: nop -; SPARC64-NEXT: sth %o0, [%i0] -; SPARC64-NEXT: ret -; SPARC64-NEXT: restore -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) - store i16 %0, ptr %a, align 2 - ret void -} - define void @test_load_store(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: test_load_store: ; CHECK: ! %bb.0: @@ -348,10 +245,10 @@ define i64 @test_fptoui_i64(ptr %p) nounwind { ; SPARC64-NEXT: save %sp, -192, %sp ; SPARC64-NEXT: call __extendhfsf2 ; SPARC64-NEXT: lduh [%i0], %o0 -; SPARC64-NEXT: sethi %h44(.LCPI17_0), %i0 -; SPARC64-NEXT: add %i0, %m44(.LCPI17_0), %i0 +; SPARC64-NEXT: sethi %h44(.LCPI13_0), %i0 +; SPARC64-NEXT: add %i0, %m44(.LCPI13_0), %i0 ; SPARC64-NEXT: sllx %i0, 12, %i0 -; SPARC64-NEXT: ld [%i0+%l44(.LCPI17_0)], %f1 +; SPARC64-NEXT: ld [%i0+%l44(.LCPI13_0)], %f1 ; SPARC64-NEXT: fsubs %f0, %f1, %f2 ; SPARC64-NEXT: fstox %f2, %f2 ; SPARC64-NEXT: std %f2, [%fp+2031] @@ -699,16 +596,16 @@ define half @PR40273(half) nounwind { ; V8-NEXT: save %sp, -96, %sp ; V8-NEXT: call __extendhfsf2 ; V8-NEXT: mov %i0, %o0 -; V8-NEXT: sethi %hi(.LCPI24_0), %i0 -; V8-NEXT: ld [%i0+%lo(.LCPI24_0)], %f1 +; V8-NEXT: sethi %hi(.LCPI20_0), %i0 +; V8-NEXT: ld [%i0+%lo(.LCPI20_0)], %f1 ; V8-NEXT: fcmps %f0, %f1 ; V8-NEXT: nop -; V8-NEXT: fbne .LBB24_2 +; V8-NEXT: fbne .LBB20_2 ; V8-NEXT: nop ; V8-NEXT: ! %bb.1: ; V8-NEXT: ret ; V8-NEXT: restore %g0, %g0, %o0 -; V8-NEXT: .LBB24_2: +; V8-NEXT: .LBB20_2: ; V8-NEXT: sethi 15, %i0 ; V8-NEXT: ret ; V8-NEXT: restore @@ -718,8 +615,8 @@ define half @PR40273(half) nounwind { ; V9-NEXT: save %sp, -96, %sp ; V9-NEXT: call __extendhfsf2 ; V9-NEXT: mov %i0, %o0 -; V9-NEXT: sethi %hi(.LCPI24_0), %i0 -; V9-NEXT: ld [%i0+%lo(.LCPI24_0)], %f1 +; V9-NEXT: sethi %hi(.LCPI20_0), %i0 +; V9-NEXT: ld [%i0+%lo(.LCPI20_0)], %f1 ; V9-NEXT: mov %g0, %i0 ; V9-NEXT: sethi 15, %i1 ; V9-NEXT: fcmps %fcc0, %f0, %f1 @@ -732,10 +629,10 @@ define half @PR40273(half) nounwind { ; SPARC64-NEXT: save %sp, -176, %sp ; SPARC64-NEXT: call __extendhfsf2 ; SPARC64-NEXT: srl %i0, 0, %o0 -; SPARC64-NEXT: sethi %h44(.LCPI24_0), %i0 -; SPARC64-NEXT: add %i0, %m44(.LCPI24_0), %i0 +; SPARC64-NEXT: sethi %h44(.LCPI20_0), %i0 +; SPARC64-NEXT: add %i0, %m44(.LCPI20_0), %i0 ; SPARC64-NEXT: sllx %i0, 12, %i0 -; SPARC64-NEXT: ld [%i0+%l44(.LCPI24_0)], %f1 +; SPARC64-NEXT: ld [%i0+%l44(.LCPI20_0)], %f1 ; SPARC64-NEXT: mov %g0, %i0 ; SPARC64-NEXT: sethi 15, %i1 ; SPARC64-NEXT: fcmps %fcc0, %f0, %f1 diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll index 1d954324f1c48..19d9253c69348 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -225,27 +225,3 @@ define double @fmuladd_d(double %a, double %b, double %c) { %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) ret double %1 } - -declare i16 @llvm.convert.to.fp16.f64(double %a) -define i16 @d_to_h(double %a) { -; CHECK-LABEL: d_to_h: -; SOFT: bl __aeabi_d2h -; VFP4: bl __aeabi_d2h -; FP-ARMv8: vcvt{{[bt]}}.f16.f64 - %1 = call i16 @llvm.convert.to.fp16.f64(double %a) - ret i16 %1 -} - -declare double @llvm.convert.from.fp16.f64(i16 %a) -define double @h_to_d(i16 %a) { -; CHECK-LABEL: h_to_d: -; NONE: bl __aeabi_h2f -; NONE: bl __aeabi_f2d -; SP: vcvt{{[bt]}}.f32.f16 -; SP: bl __aeabi_f2d -; VFPv4: vcvt{{[bt]}}.f32.f16 -; VFPv4: vcvt.f64.f32 -; FP-ARMv8: vcvt{{[bt]}}.f64.f16 - %1 = call double @llvm.convert.from.fp16.f64(i16 %a) - ret double %1 -} diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll index 864983c4e7701..dd2f9d86deec2 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -231,21 +231,3 @@ define float @fmuladd_f(float %a, float %b, float %c) { %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ret float %1 } - -declare i16 @llvm.convert.to.fp16.f32(float %a) -define i16 @f_to_h(float %a) { -; CHECK-LABEL: f_to_h: -; SOFT: bl __aeabi_f2h -; HARD: vcvt{{[bt]}}.f16.f32 - %1 = call i16 @llvm.convert.to.fp16.f32(float %a) - ret i16 %1 -} - -declare float @llvm.convert.from.fp16.f32(i16 %a) -define float @h_to_f(i16 %a) { -; CHECK-LABEL: h_to_f: -; SOFT: bl __aeabi_h2f -; HARD: vcvt{{[bt]}}.f32.f16 - %1 = call float @llvm.convert.from.fp16.f32(i16 %a) - ret float %1 -} diff --git a/llvm/test/CodeGen/VE/Scalar/fp_extload_truncstore.ll b/llvm/test/CodeGen/VE/Scalar/fp_extload_truncstore.ll index f105966bc4d08..ad7f23f0689bc 100644 --- a/llvm/test/CodeGen/VE/Scalar/fp_extload_truncstore.ll +++ b/llvm/test/CodeGen/VE/Scalar/fp_extload_truncstore.ll @@ -1,77 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s -declare i16 @llvm.convert.to.fp16.f32(float %a) -declare i16 @llvm.convert.to.fp16.f64(double %a) - -declare float @llvm.convert.from.fp16.f32(i16 %a) -declare double @llvm.convert.from.fp16.f64(i16 %a) - -define float @func_i16fp32(ptr %a) { -; CHECK-LABEL: func_i16fp32: -; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: ld2b.zx %s0, (, %s0) -; CHECK-NEXT: lea %s1, __extendhfsf2@lo -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: lea.sl %s12, __extendhfsf2@hi(, %s1) -; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l.t (, %s10) - %a.val = load i16, ptr %a, align 4 - %a.asd = call float @llvm.convert.from.fp16.f32(i16 %a.val) - ret float %a.asd -} - -define double @func_i16fp64(ptr %a) { -; CHECK-LABEL: func_i16fp64: -; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ld2b.zx %s0, (, %s0) -; CHECK-NEXT: lea %s1, __extendhfsf2@lo -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: lea.sl %s12, __extendhfsf2@hi(, %s1) -; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: cvt.d.s %s0, %s0 -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l.t (, %s10) - %a.val = load i16, ptr %a, align 4 - %a.asd = call double @llvm.convert.from.fp16.f64(i16 %a.val) - ret double %a.asd -} - define float @func_fp16fp32(ptr %a) { ; CHECK-LABEL: func_fp16fp32: ; CHECK: # %bb.0: @@ -79,7 +8,7 @@ define float @func_fp16fp32(ptr %a) { ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 ; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -89,7 +18,7 @@ define float @func_fp16fp32(ptr %a) { ; CHECK-NEXT: shm.l %s11, 16(%s61) ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: ld2b.zx %s0, (, %s0) ; CHECK-NEXT: lea %s1, __extendhfsf2@lo ; CHECK-NEXT: and %s1, %s1, (32)0 @@ -111,7 +40,7 @@ define double @func_fp16fp64(ptr %a) { ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 ; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -121,7 +50,7 @@ define double @func_fp16fp64(ptr %a) { ; CHECK-NEXT: shm.l %s11, 16(%s61) ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: ld2b.zx %s0, (, %s0) ; CHECK-NEXT: lea %s1, __extendhfsf2@lo ; CHECK-NEXT: and %s1, %s1, (32)0 @@ -137,42 +66,6 @@ define double @func_fp16fp64(ptr %a) { ret double %a.asd } -define void @func_fp32i16(ptr %fl.ptr, float %val) { -; CHECK-LABEL: func_fp32i16: -; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: or %s18, 0, %s0 -; CHECK-NEXT: lea %s0, __truncsfhf2@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s12, __truncsfhf2@hi(, %s0) -; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: st2b %s0, (, %s18) -; CHECK-NEXT: ld %s18, 288(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l.t (, %s10) - %val.asf = call i16 @llvm.convert.to.fp16.f32(float %val) - store i16 %val.asf, ptr %fl.ptr - ret void -} - define half @func_fp32fp16(ptr %fl.ptr, float %a) { ; CHECK-LABEL: func_fp32fp16: ; CHECK: # %bb.0: @@ -180,7 +73,7 @@ define half @func_fp32fp16(ptr %fl.ptr, float %a) { ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 ; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -190,7 +83,7 @@ define half @func_fp32fp16(ptr %fl.ptr, float %a) { ; CHECK-NEXT: shm.l %s11, 16(%s61) ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill ; CHECK-NEXT: st %s19, 296(, %s11) # 8-byte Folded Spill ; CHECK-NEXT: or %s18, 0, %s0 @@ -228,42 +121,6 @@ define double @func_fp32fp64(ptr %a) { ret double %a.asd } -define void @func_fp64i16(ptr %fl.ptr, double %val) { -; CHECK-LABEL: func_fp64i16: -; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: or %s18, 0, %s0 -; CHECK-NEXT: lea %s0, __truncdfhf2@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(, %s0) -; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: st2b %s0, (, %s18) -; CHECK-NEXT: ld %s18, 288(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l.t (, %s10) - %val.asf = call i16 @llvm.convert.to.fp16.f64(double %val) - store i16 %val.asf, ptr %fl.ptr - ret void -} - define void @func_fp64fp16(ptr %fl.ptr, double %val) { ; CHECK-LABEL: func_fp64fp16: ; CHECK: # %bb.0: @@ -271,7 +128,7 @@ define void @func_fp64fp16(ptr %fl.ptr, double %val) { ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 ; CHECK-NEXT: lea %s11, -240(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -281,7 +138,7 @@ define void @func_fp64fp16(ptr %fl.ptr, double %val) { ; CHECK-NEXT: shm.l %s11, 16(%s61) ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill ; CHECK-NEXT: or %s18, 0, %s0 ; CHECK-NEXT: lea %s0, __truncdfhf2@lo diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll index 0486975f6cba7..100f6c6b51de0 100644 --- a/llvm/test/CodeGen/WebAssembly/f16.ll +++ b/llvm/test/CodeGen/WebAssembly/f16.ll @@ -33,102 +33,6 @@ define half @return(ptr %p) nounwind { ret half %r } -define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; DEFISEL-LABEL: loadd: -; DEFISEL: .functype loadd (i32) -> (f64) -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push3=, 0 -; DEFISEL-NEXT: i32.load16_u $push0=, 2($pop3) -; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 -; DEFISEL-NEXT: f64.promote_f32 $push2=, $pop1 -; DEFISEL-NEXT: return $pop2 -; -; FASTISEL-LABEL: loadd: -; FASTISEL: .functype loadd (i32) -> (f64) -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push3=, 0 -; FASTISEL-NEXT: i32.load16_u $push2=, 2($pop3) -; FASTISEL-NEXT: call $push1=, __extendhfsf2, $pop2 -; FASTISEL-NEXT: f64.promote_f32 $push0=, $pop1 -; FASTISEL-NEXT: return $pop0 - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %x = load i16, ptr %arrayidx, align 2 - %ret = tail call double @llvm.convert.from.fp16.f64(i16 %x) - ret double %ret -} - -define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { -; DEFISEL-LABEL: loadf: -; DEFISEL: .functype loadf (i32) -> (f32) -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push2=, 0 -; DEFISEL-NEXT: i32.load16_u $push0=, 2($pop2) -; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 -; DEFISEL-NEXT: return $pop1 -; -; FASTISEL-LABEL: loadf: -; FASTISEL: .functype loadf (i32) -> (f32) -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push2=, 0 -; FASTISEL-NEXT: i32.load16_u $push1=, 2($pop2) -; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop1 -; FASTISEL-NEXT: return $pop0 - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %x = load i16, ptr %arrayidx, align 2 - %ret = tail call float @llvm.convert.from.fp16.f32(i16 %x) - ret float %ret -} - -define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { -; DEFISEL-LABEL: stored: -; DEFISEL: .functype stored (i32, f64) -> () -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push2=, 0 -; DEFISEL-NEXT: local.get $push1=, 1 -; DEFISEL-NEXT: call $push0=, __truncdfhf2, $pop1 -; DEFISEL-NEXT: i32.store16 0($pop2), $pop0 -; DEFISEL-NEXT: return -; -; FASTISEL-LABEL: stored: -; FASTISEL: .functype stored (i32, f64) -> () -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push4=, 0 -; FASTISEL-NEXT: local.get $push3=, 1 -; FASTISEL-NEXT: call $push2=, __truncdfhf2, $pop3 -; FASTISEL-NEXT: i32.const $push1=, 65535 -; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1 -; FASTISEL-NEXT: i32.store16 0($pop4), $pop0 -; FASTISEL-NEXT: return - %x = tail call i16 @llvm.convert.to.fp16.f64(double %b) - store i16 %x, ptr %a, align 2 - ret void -} - -define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { -; DEFISEL-LABEL: storef: -; DEFISEL: .functype storef (i32, f32) -> () -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push2=, 0 -; DEFISEL-NEXT: local.get $push1=, 1 -; DEFISEL-NEXT: call $push0=, __truncsfhf2, $pop1 -; DEFISEL-NEXT: i32.store16 0($pop2), $pop0 -; DEFISEL-NEXT: return -; -; FASTISEL-LABEL: storef: -; FASTISEL: .functype storef (i32, f32) -> () -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push4=, 0 -; FASTISEL-NEXT: local.get $push3=, 1 -; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop3 -; FASTISEL-NEXT: i32.const $push1=, 65535 -; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1 -; FASTISEL-NEXT: i32.store16 0($pop4), $pop0 -; FASTISEL-NEXT: return - %x = tail call i16 @llvm.convert.to.fp16.f32(float %b) - store i16 %x, ptr %a, align 2 - ret void -} - define void @test_load_store(ptr %in, ptr %out) nounwind { ; ALL-LABEL: test_load_store: ; ALL: .functype test_load_store (i32, i32) -> () diff --git a/llvm/test/CodeGen/X86/cvt16-2.ll b/llvm/test/CodeGen/X86/cvt16-2.ll deleted file mode 100644 index 8dbbc57f10564..0000000000000 --- a/llvm/test/CodeGen/X86/cvt16-2.ll +++ /dev/null @@ -1,171 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-avx512fp16 | FileCheck %s -check-prefix=LIBCALL -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512fp16 | FileCheck %s -check-prefix=FP16 - -define void @test1(float %src, ptr %dest) { -; LIBCALL-LABEL: test1: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rbx -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: .cfi_offset %rbx, -16 -; LIBCALL-NEXT: movq %rdi, %rbx -; LIBCALL-NEXT: callq __truncsfhf2@PLT -; LIBCALL-NEXT: pextrw $0, %xmm0, %eax -; LIBCALL-NEXT: movw %ax, (%rbx) -; LIBCALL-NEXT: popq %rbx -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: retq -; -; FP16-LABEL: test1: -; FP16: # %bb.0: -; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 -; FP16-NEXT: vmovsh %xmm0, (%rdi) -; FP16-NEXT: retq - %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src) - store i16 %1, ptr %dest, align 2 - ret void -} - -define float @test2(ptr nocapture %src) { -; LIBCALL-LABEL: test2: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 -; LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL -; -; FP16-LABEL: test2: -; FP16: # %bb.0: -; FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero -; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 -; FP16-NEXT: retq - %1 = load i16, ptr %src, align 2 - %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1) - ret float %2 -} - -define float @test3(float %src) nounwind uwtable readnone { -; LIBCALL-LABEL: test3: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: callq __truncsfhf2@PLT -; LIBCALL-NEXT: popq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL -; -; FP16-LABEL: test3: -; FP16: # %bb.0: -; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 -; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 -; FP16-NEXT: retq - %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src) - %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1) - ret float %2 -} - -; FIXME: Should it be __extendhfdf2? -define double @test4(ptr nocapture %src) { -; LIBCALL-LABEL: test4: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 -; LIBCALL-NEXT: callq __extendhfsf2@PLT -; LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 -; LIBCALL-NEXT: popq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: retq -; -; FP16-LABEL: test4: -; FP16: # %bb.0: -; FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero -; FP16-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 -; FP16-NEXT: retq - %1 = load i16, ptr %src, align 2 - %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1) - ret double %2 -} - -define i16 @test5(double %src) { -; LIBCALL-LABEL: test5: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: callq __truncdfhf2@PLT -; LIBCALL-NEXT: pextrw $0, %xmm0, %eax -; LIBCALL-NEXT: # kill: def $ax killed $ax killed $eax -; LIBCALL-NEXT: popq %rcx -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: retq -; -; FP16-LABEL: test5: -; FP16: # %bb.0: -; FP16-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0 -; FP16-NEXT: vmovw %xmm0, %eax -; FP16-NEXT: # kill: def $ax killed $ax killed $eax -; FP16-NEXT: retq - %val = tail call i16 @llvm.convert.to.fp16.f64(double %src) - ret i16 %val -} - -; FIXME: Should it be __extendhfxf2? -define x86_fp80 @test6(ptr nocapture %src) { -; LIBCALL-LABEL: test6: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 -; LIBCALL-NEXT: callq __extendhfxf2@PLT -; LIBCALL-NEXT: popq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: retq -; -; FP16-LABEL: test6: -; FP16: # %bb.0: -; FP16-NEXT: pushq %rax -; FP16-NEXT: .cfi_def_cfa_offset 16 -; FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero -; FP16-NEXT: callq __extendhfxf2@PLT -; FP16-NEXT: popq %rax -; FP16-NEXT: .cfi_def_cfa_offset 8 -; FP16-NEXT: retq - %1 = load i16, ptr %src, align 2 - %2 = tail call x86_fp80 @llvm.convert.from.fp16.f80(i16 %1) - ret x86_fp80 %2 -} - -define i16 @test7(x86_fp80 %src) { -; LIBCALL-LABEL: test7: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: subq $24, %rsp -; LIBCALL-NEXT: .cfi_def_cfa_offset 32 -; LIBCALL-NEXT: fldt {{[0-9]+}}(%rsp) -; LIBCALL-NEXT: fstpt (%rsp) -; LIBCALL-NEXT: callq __truncxfhf2@PLT -; LIBCALL-NEXT: pextrw $0, %xmm0, %eax -; LIBCALL-NEXT: # kill: def $ax killed $ax killed $eax -; LIBCALL-NEXT: addq $24, %rsp -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: retq -; -; FP16-LABEL: test7: -; FP16: # %bb.0: -; FP16-NEXT: subq $24, %rsp -; FP16-NEXT: .cfi_def_cfa_offset 32 -; FP16-NEXT: fldt {{[0-9]+}}(%rsp) -; FP16-NEXT: fstpt (%rsp) -; FP16-NEXT: callq __truncxfhf2@PLT -; FP16-NEXT: vmovw %xmm0, %eax -; FP16-NEXT: # kill: def $ax killed $ax killed $eax -; FP16-NEXT: addq $24, %rsp -; FP16-NEXT: .cfi_def_cfa_offset 8 -; FP16-NEXT: retq - %val = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %src) - ret i16 %val -} - -declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone -declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone -declare x86_fp80 @llvm.convert.from.fp16.f80(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f80(x86_fp80) nounwind readnone diff --git a/llvm/test/CodeGen/X86/cvt16.ll b/llvm/test/CodeGen/X86/cvt16.ll deleted file mode 100644 index db615c8065d03..0000000000000 --- a/llvm/test/CodeGen/X86/cvt16.ll +++ /dev/null @@ -1,171 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=LIBCALL -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=F16C -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c,+soft-float | FileCheck %s -check-prefix=SOFTFLOAT -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c,+soft-float | FileCheck %s -check-prefix=SOFTFLOAT - -; This is a test for float to half float conversions on x86-64. -; -; If flag -soft-float is set, or if there is no F16C support, then: -; 1) half float to float conversions are -; translated into calls to __gnu_h2f_ieee defined -; by the compiler runtime library; -; 2) float to half float conversions are translated into calls -; to __gnu_f2h_ieee which expected to be defined by the -; compiler runtime library. -; -; Otherwise (we have F16C support): -; 1) half float to float conversion are translated using -; vcvtph2ps instructions; -; 2) float to half float conversions are translated using -; vcvtps2ph instructions - - -define void @test1(float %src, ptr %dest) nounwind { -; LIBCALL-LABEL: test1: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rbx -; LIBCALL-NEXT: movq %rdi, %rbx -; LIBCALL-NEXT: callq __truncsfhf2@PLT -; LIBCALL-NEXT: pextrw $0, %xmm0, (%rbx) -; LIBCALL-NEXT: popq %rbx -; LIBCALL-NEXT: retq -; -; F16C-LABEL: test1: -; F16C: # %bb.0: -; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) -; F16C-NEXT: retq -; -; SOFTFLOAT-LABEL: test1: -; SOFTFLOAT: # %bb.0: -; SOFTFLOAT-NEXT: pushq %rbx -; SOFTFLOAT-NEXT: movq %rsi, %rbx -; SOFTFLOAT-NEXT: callq __truncsfhf2@PLT -; SOFTFLOAT-NEXT: movw %ax, (%rbx) -; SOFTFLOAT-NEXT: popq %rbx -; SOFTFLOAT-NEXT: retq - %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src) - store i16 %1, ptr %dest, align 2 - ret void -} - -define float @test2(ptr nocapture %src) nounwind { -; LIBCALL-LABEL: test2: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 -; LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL -; -; F16C-LABEL: test2: -; F16C: # %bb.0: -; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 -; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: retq -; -; SOFTFLOAT-LABEL: test2: -; SOFTFLOAT: # %bb.0: -; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: movzwl (%rdi), %edi -; SOFTFLOAT-NEXT: callq __extendhfsf2@PLT -; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: retq - %1 = load i16, ptr %src, align 2 - %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1) - ret float %2 -} - -define float @test3(float %src) nounwind uwtable readnone { -; LIBCALL-LABEL: test3: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: callq __truncsfhf2@PLT -; LIBCALL-NEXT: popq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 -; LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL -; -; F16C-LABEL: test3: -; F16C: # %bb.0: -; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: retq -; -; SOFTFLOAT-LABEL: test3: -; SOFTFLOAT: # %bb.0: -; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 16 -; SOFTFLOAT-NEXT: callq __truncsfhf2@PLT -; SOFTFLOAT-NEXT: movzwl %ax, %edi -; SOFTFLOAT-NEXT: callq __extendhfsf2@PLT -; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 8 -; SOFTFLOAT-NEXT: retq - %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src) - %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1) - ret float %2 -} - -define double @test4(ptr nocapture %src) nounwind { -; LIBCALL-LABEL: test4: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 -; LIBCALL-NEXT: callq __extendhfsf2@PLT -; LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 -; LIBCALL-NEXT: popq %rax -; LIBCALL-NEXT: retq -; -; F16C-LABEL: test4: -; F16C: # %bb.0: -; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 -; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; F16C-NEXT: retq -; -; SOFTFLOAT-LABEL: test4: -; SOFTFLOAT: # %bb.0: -; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: movzwl (%rdi), %edi -; SOFTFLOAT-NEXT: callq __extendhfsf2@PLT -; SOFTFLOAT-NEXT: movl %eax, %edi -; SOFTFLOAT-NEXT: callq __extendsfdf2@PLT -; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: retq - %1 = load i16, ptr %src, align 2 - %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1) - ret double %2 -} - -define i16 @test5(double %src) nounwind { -; LIBCALL-LABEL: test5: -; LIBCALL: # %bb.0: -; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: callq __truncdfhf2@PLT -; LIBCALL-NEXT: pextrw $0, %xmm0, %eax -; LIBCALL-NEXT: # kill: def $ax killed $ax killed $eax -; LIBCALL-NEXT: popq %rcx -; LIBCALL-NEXT: retq -; -; F16C-LABEL: test5: -; F16C: # %bb.0: -; F16C-NEXT: pushq %rax -; F16C-NEXT: callq __truncdfhf2@PLT -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: # kill: def $ax killed $ax killed $eax -; F16C-NEXT: popq %rcx -; F16C-NEXT: retq -; -; SOFTFLOAT-LABEL: test5: -; SOFTFLOAT: # %bb.0: -; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: callq __truncdfhf2@PLT -; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: retq - %val = tail call i16 @llvm.convert.to.fp16.f64(double %src) - ret i16 %val -} - -declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone -declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone -declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/X86/fast-isel-double-half-convertion.ll b/llvm/test/CodeGen/X86/fast-isel-double-half-convertion.ll deleted file mode 100644 index d17ce101d0906..0000000000000 --- a/llvm/test/CodeGen/X86/fast-isel-double-half-convertion.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s - -; XFAIL: * - -; In the future, we might want to teach fast-isel how to expand a double-to-half -; conversion into a double-to-float conversion immediately followed by a -; float-to-half conversion. For now, fast-isel is expected to fail. - -define double @test_fp16_to_fp64(i32 %a) { -entry: - %0 = trunc i32 %a to i16 - %1 = call double @llvm.convert.from.fp16.f64(i16 %0) - ret float %0 -} - -define i16 @test_fp64_to_fp16(double %a) { -entry: - %0 = call i16 @llvm.convert.to.fp16.f64(double %a) - ret i16 %0 -} - -declare i16 @llvm.convert.to.fp16.f64(double) -declare double @llvm.convert.from.fp16.f64(i16) diff --git a/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll b/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll deleted file mode 100644 index 43a26c123e78f..0000000000000 --- a/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc -fast-isel -fast-isel-abort=1 -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s -; RUN: llc -fast-isel -fast-isel-abort=1 -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+avx512vl < %s | FileCheck %s - -; Verify that fast-isel correctly expands float-half conversions. - -define i16 @test_fp32_to_fp16(float %a) { -; CHECK-LABEL: test_fp32_to_fp16: -; CHECK: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: retq -entry: - %0 = call i16 @llvm.convert.to.fp16.f32(float %a) - ret i16 %0 -} - -define float @test_fp16_to_fp32(i32 %a) { -; CHECK-LABEL: test_fp16_to_fp32: -; CHECK: movzwl %di, %eax -; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NEXT: retq -entry: - %0 = trunc i32 %a to i16 - %1 = call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 -} - -declare i16 @llvm.convert.to.fp16.f32(float) -declare float @llvm.convert.from.fp16.f32(i16)