Skip to content

DAG: Use fast variants of fast math libcalls #147481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/arsenm/arm/start-moving-runtime-libcalls-into-tablegen
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions llvm/include/llvm/IR/RuntimeLibcalls.td
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in {

foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
def ADD_#FPTy : RuntimeLibcall;
def FAST_ADD_#FPTy : RuntimeLibcall;

def SUB_#FPTy : RuntimeLibcall;
def FAST_SUB_#FPTy : RuntimeLibcall;

def MUL_#FPTy : RuntimeLibcall;
def FAST_MUL_#FPTy : RuntimeLibcall;

def DIV_#FPTy : RuntimeLibcall;
def FAST_DIV_#FPTy : RuntimeLibcall;

def REM_#FPTy : RuntimeLibcall;
def FMA_#FPTy : RuntimeLibcall;
def POWI_#FPTy : RuntimeLibcall;

def SQRT_#FPTy : RuntimeLibcall;
def FAST_SQRT_#FPTy : RuntimeLibcall;

def CBRT_#FPTy : RuntimeLibcall;
def LOG_#FPTy : RuntimeLibcall;
def LOG_FINITE_#FPTy : RuntimeLibcall;
Expand Down Expand Up @@ -1411,27 +1422,26 @@ def __hexagon_moddi3 : RuntimeLibcallImpl<SREM_I64>;
def __hexagon_umodsi3 : RuntimeLibcallImpl<UREM_I32>;
def __hexagon_umoddi3 : RuntimeLibcallImpl<UREM_I64>;

// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function
def __hexagon_adddf3 : RuntimeLibcallImpl<ADD_F64>;
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<ADD_F64>;
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<FAST_ADD_F64>;

def __hexagon_subdf3 : RuntimeLibcallImpl<SUB_F64>;
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<SUB_F64>;
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<FAST_SUB_F64>;

def __hexagon_muldf3 : RuntimeLibcallImpl<MUL_F64>;
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<MUL_F64>;
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<FAST_MUL_F64>;

def __hexagon_divdf3 : RuntimeLibcallImpl<DIV_F64>;
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<DIV_F64>;
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<FAST_DIV_F64>;

def __hexagon_divsf3 : RuntimeLibcallImpl<DIV_F32>;
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<DIV_F32>;
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<FAST_DIV_F32>;

def __hexagon_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<FAST_SQRT_F32>;

// This is the only fast library function for sqrtd.
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<SQRT_F64>;
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<FAST_SQRT_F64>;

def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
: RuntimeLibcallImpl<HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES>;
Expand Down
125 changes: 99 additions & 26 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,19 @@ class SelectionDAGLegalize {
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);

void
ExpandFastFPLibCall(SDNode *Node, bool IsFast,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
SmallVectorImpl<SDValue> &Results);

SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
Expand Down Expand Up @@ -2229,6 +2236,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}

void SelectionDAGLegalize::ExpandFastFPLibCall(
SDNode *Node, bool IsFast,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
SmallVectorImpl<SDValue> &Results) {

EVT VT = Node->getSimpleValueType(0);

RTLIB::Libcall LC;

// FIXME: Probably should define fast to respect nan/inf and only be
// approximate functions.

if (IsFast) {
LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first,
Call_F128.first, Call_PPCF128.first);
}

if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
// Fall back if we don't have a fast implementation.
LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second,
Call_F80.second, Call_F128.second,
Call_PPCF128.second);
}

ExpandFPLibCall(Node, LC, Results);
}

SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
Expand Down Expand Up @@ -4515,6 +4553,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
return true;
}

/// Return if we can use the FAST_* variant of a math libcall for the node.
/// FIXME: This is just guessing, we probably should have unique specific sets
/// flags required per libcall.
static bool canUseFastMathLibcall(const SDNode *Node) {
// FIXME: Probably should define fast to respect nan/inf and only be
// approximate functions.

SDNodeFlags Flags = Node->getFlags();
return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() &&
Flags.hasNoInfs() && Flags.hasNoSignedZeros();
}

void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
SmallVector<SDValue, 8> Results;
Expand Down Expand Up @@ -4635,11 +4685,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
break;
case ISD::FSQRT:
case ISD::STRICT_FSQRT:
ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128, Results);
case ISD::STRICT_FSQRT: {
// FIXME: Probably should define fast to respect nan/inf and only be
// approximate functions.
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
{RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
{RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
{RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
{RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
{RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
Results);
break;
}
case ISD::FCBRT:
ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
Expand Down Expand Up @@ -4876,11 +4933,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::LLRINT_PPCF128, Results);
break;
case ISD::FDIV:
case ISD::STRICT_FDIV:
ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
RTLIB::DIV_F80, RTLIB::DIV_F128,
RTLIB::DIV_PPCF128, Results);
case ISD::STRICT_FDIV: {
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
{RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
{RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
{RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
{RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
{RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
break;
}
case ISD::FREM:
case ISD::STRICT_FREM:
ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
Expand All @@ -4894,17 +4955,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMA_PPCF128, Results);
break;
case ISD::FADD:
case ISD::STRICT_FADD:
ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
RTLIB::ADD_F80, RTLIB::ADD_F128,
RTLIB::ADD_PPCF128, Results);
case ISD::STRICT_FADD: {
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
{RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
{RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
{RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
{RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
{RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
break;
}
case ISD::FMUL:
case ISD::STRICT_FMUL:
ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
RTLIB::MUL_F80, RTLIB::MUL_F128,
RTLIB::MUL_PPCF128, Results);
case ISD::STRICT_FMUL: {
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
{RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
{RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
{RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
{RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
{RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
break;
}
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
Expand Down Expand Up @@ -5077,11 +5146,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FSUB:
case ISD::STRICT_FSUB:
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
RTLIB::SUB_F80, RTLIB::SUB_F128,
RTLIB::SUB_PPCF128, Results);
case ISD::STRICT_FSUB: {
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
{RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
{RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
{RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
{RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
{RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
break;
}
case ISD::SREM:
Results.push_back(ExpandIntLibCall(Node, true,
RTLIB::SREM_I8,
Expand Down
41 changes: 15 additions & 26 deletions llvm/lib/IR/RuntimeLibcalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ using namespace RTLIB;
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS

static cl::opt<bool>
HexagonEnableFastMathRuntimeCalls("hexagon-fast-math", cl::Hidden,
cl::desc("Enable Fast Math processing"));

static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
FloatABI::ABIType FloatABIType,
EABI EABIVersion) {
Expand Down Expand Up @@ -268,32 +264,25 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
setLibcallImpl(RTLIB::UREM_I32, RTLIB::__hexagon_umodsi3);
setLibcallImpl(RTLIB::UREM_I64, RTLIB::__hexagon_umoddi3);

const bool FastMath = HexagonEnableFastMathRuntimeCalls;
// This is the only fast library function for sqrtd.
if (FastMath)
setLibcallImpl(RTLIB::SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);

// Prefix is: nothing for "slow-math",
// "fast2_" for V5+ fast-math double-precision
// (actually, keep fast-math and fast-math2 separate for now)
if (FastMath) {
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_fast_adddf3);
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_fast_subdf3);
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_fast_muldf3);
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_fast_divdf3);
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_fast_divsf3);
} else {
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
}

if (FastMath)
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
else
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
setLibcallImpl(RTLIB::FAST_ADD_F64, RTLIB::__hexagon_fast_adddf3);
setLibcallImpl(RTLIB::FAST_SUB_F64, RTLIB::__hexagon_fast_subdf3);
setLibcallImpl(RTLIB::FAST_MUL_F64, RTLIB::__hexagon_fast_muldf3);
setLibcallImpl(RTLIB::FAST_DIV_F64, RTLIB::__hexagon_fast_divdf3);
setLibcallImpl(RTLIB::FAST_DIV_F32, RTLIB::__hexagon_fast_divsf3);
setLibcallImpl(RTLIB::FAST_SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
// This is the only fast library function for sqrtd.
setLibcallImpl(RTLIB::FAST_SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);

setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);

setLibcallImpl(
RTLIB::HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES,
Expand Down
Loading