-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[ARM] Introduce intrinsics for MVE fma under strict-fp. #169771
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-llvm-ir Author: David Green (davemgreen) ChangesSimilar to #169156, this adds an @arm.mve.fma intrinsic for strict-fp. A Builder class is added to act as the common subclass of IRBuilder and IRInt. Patch is 96.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169771.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 2e5e1d93be096..51b7dd16e5195 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -167,7 +167,9 @@ multiclass FMA<bit add> {
// second multiply input.
defvar m2_cg = !if(add, (id $m2), (fneg $m2));
- defvar unpred_cg = (IRIntBase<"fma", [Vector]> $m1, m2_cg, $addend);
+ defvar fma = strictFPAlt<IRIntBase<"fma", [Vector]>,
+ IRInt<"fma", [Vector]>>;
+ defvar unpred_cg = (fma $m1, m2_cg, $addend);
defvar pred_cg = (IRInt<"fma_predicated", [Vector, Predicate]>
$m1, m2_cg, $addend, $pred);
@@ -723,7 +725,7 @@ multiclass compare_with_pred<string condname, dag arguments,
NameOverride<"vcmp" # condname # "q_m" # suffix>;
}
-multiclass compare<string condname, IRBuilder cmpop> {
+multiclass compare<string condname, Builder cmpop> {
// Make all four variants of a comparison: the vector/vector and
// vector/scalar forms, each using compare_with_pred to make a
// predicated and unpredicated version.
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index eeca9153dd742..3714262898476 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -34,7 +34,8 @@ class IRBuilderAddrParam<int index_> : IRBuilderParam<index_>;
class IRBuilderIntParam<int index_, string type_> : IRBuilderParam<index_> {
string type = type_;
}
-class IRBuilderBase {
+class Builder {}
+class IRBuilderBase : Builder {
// The prefix of the function call, including an open parenthesis.
string prefix;
@@ -166,7 +167,7 @@ def address;
// Another node class you can use in the codegen dag. This one corresponds to
// an IR intrinsic function, which has to be specialized to a particular list
// of types.
-class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
+class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> : Builder {
string intname = name_; // base name of the intrinsic
list<Type> params = params_; // list of parameter types
@@ -214,8 +215,8 @@ def bitsize;
// strictFPAlt allows a node to have different code generation under strict-fp.
// TODO: The standard node can be IRBuilderBase or IRIntBase.
-class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> {
- IRBuilderBase standard = standard_;
+class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
+ Builder standard = standard_;
IRIntBase strictfp = strictfp_;
}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
index 768d397cb5611..3ab84459e0515 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
@@ -1,15 +1,22 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_mve.h>
-// CHECK-LABEL: @test_vfmaq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x half> [[A:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x half> [[A:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
#ifdef POLYMORPHIC
@@ -19,10 +26,15 @@ float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmaq_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x float> [[A:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x float> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
#ifdef POLYMORPHIC
@@ -32,12 +44,19 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmaq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_n_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
#ifdef POLYMORPHIC
@@ -47,12 +66,19 @@ float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmaq_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_n_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
#ifdef POLYMORPHIC
@@ -62,12 +88,19 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmasq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmasq_n_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmasq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
#ifdef POLYMORPHIC
@@ -77,12 +110,19 @@ float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmasq_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmasq_n_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmasq_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vfmasq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
#ifdef POLYMORPHIC
@@ -92,11 +132,17 @@ float32x4_t test_vfmasq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmsq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[A:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vfmsq_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vfmsq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP1]]
//
float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
#ifdef POLYMORPHIC
@@ -106,11 +152,17 @@ float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vfmsq_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[A:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vfmsq_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vfmsq_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP1]]
//
float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
#ifdef POLYMORPHIC
@@ -312,11 +364,17 @@ uint32x4_t test_vmlasq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vqdmlahq_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]])
-// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_n_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: ret <16 x i8> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP1]]
//
int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
#ifdef POLYMORPHIC
@@ -326,11 +384,17 @@ int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vqdmlahq_n_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]])
-// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_n_s16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_n_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP1]]
//
int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
#ifdef POLYMORPHIC
@@ -340,10 +404,15 @@ int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vqdmlahq_n_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]])
-// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_n_s32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_n_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
#ifdef POLYMORPHIC
@@ -353,11 +422,17 @@ int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vqdmlashq_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]])
-// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_n_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: ret <16 x i8> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP1]]
//
int8x16_t test_vqdmlashq_n_s8(int8x16_t m1, int8x16_t m2, int8_t add) {
#ifdef POLYMORPHIC
@@ -367,11 +4...
[truncated]
|
|
✅ With the latest revision this PR passed the undef deprecator. |
Similar to llvm#169156, this adds an @arm.mve.fma intrinsic for strict-fp. A Builder class is added to act as the common subclass of IRBuilder and IRInt.
8a6d43c to
b4db38a
Compare
nasherm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/25571 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/42/builds/7179 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/136/builds/6132 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/19272 Here is the relevant piece of the build log for the reference |
Similar to #169156, this adds an @arm.mve.fma intrinsic for strict-fp. A Builder class is added to act as the common subclass of IRBuilder and IRInt.