From 627bcb3bde64a780ed2b9aaaa9267d97c9679f9c Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Wed, 26 Nov 2025 17:45:00 +0530 Subject: [PATCH 01/33] Add CIR sqrt builtin support for X86 --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 344 +++++++++++++++++- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 66 +++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 323 +++++++++++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 14 + .../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 +++ 5 files changed, 772 insertions(+), 21 deletions(-) create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index e612d6a0ba886..291b035e6204c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -802,8 +802,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [ //===----------------------------------------------------------------------===// defvar CIR_YieldableScopes = [ - "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp", - "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp" + "ArrayCtor", "ArrayDtor", "AwaitOp", "CaseOp", "DoWhileOp", "ForOp", + "GlobalOp", "IfOp", "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp" ]; def CIR_YieldOp : CIR_Op<"yield", [ @@ -1640,6 +1640,82 @@ def CIR_CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> { let isLLVMLoweringRecursive = true; } +//===----------------------------------------------------------------------===// +// BinOpOverflowOp +//===----------------------------------------------------------------------===// + +def CIR_BinOpOverflowKind : CIR_I32EnumAttr< + "BinOpOverflowKind", "checked binary arithmetic operation kind", [ + I32EnumAttrCase<"Add", 0, "add">, + I32EnumAttrCase<"Sub", 1, "sub">, + I32EnumAttrCase<"Mul", 2, "mul"> +]>; + +def CIR_BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> { + let summary = "Perform binary integral arithmetic with overflow checking"; + let description = [{ + `cir.binop.overflow` performs binary arithmetic operations with overflow + checking on integral operands. + + The `kind` argument specifies the kind of arithmetic operation to perform. + It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments + specify the input operands of the arithmetic operation. The types of `lhs` + and `rhs` must be the same. + + `cir.binop.overflow` produces two SSA values. `result` is the result of the + arithmetic operation truncated to its specified type. `overflow` is a + boolean value indicating whether overflow happens during the operation. + + The exact semantic of this operation is as follows: + + - `lhs` and `rhs` are promoted to an imaginary integral type that has + infinite precision. + - The arithmetic operation is performed on the promoted operands. + - The infinite-precision result is truncated to the type of `result`. The + truncated result is assigned to `result`. + - If the truncated result is equal to the un-truncated result, `overflow` + is assigned to false. Otherwise, `overflow` is assigned to true. + }]; + + let arguments = (ins + CIR_BinOpOverflowKind:$kind, + CIR_IntType:$lhs, + CIR_IntType:$rhs + ); + + let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow); + + let assemblyFormat = [{ + `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,` + `(` qualified(type($result)) `,` qualified(type($overflow)) `)` + attr-dict + }]; + + let builders = [ + OpBuilder<(ins "cir::IntType":$resultTy, + "cir::BinOpOverflowKind":$kind, + "mlir::Value":$lhs, + "mlir::Value":$rhs), [{ + auto overflowTy = cir::BoolType::get($_builder.getContext()); + build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs); + }]> + ]; + + let extraLLVMLoweringPatternDecl = [{ + static std::string getLLVMIntrinName(cir::BinOpOverflowKind opKind, + bool isSigned, unsigned width); + + struct EncompassedTypeInfo { + bool sign; + unsigned width; + }; + + static EncompassedTypeInfo computeEncompassedTypeWidth(cir::IntType operandTy, + cir::IntType resultTy); + }]; +} + + //===----------------------------------------------------------------------===// // BinOp //===----------------------------------------------------------------------===// @@ -2533,7 +2609,9 @@ def CIR_FuncOp : CIR_Op<"func", [ OptionalAttr:$res_attrs, OptionalAttr:$aliasee, CIR_OptionalPriorityAttr:$global_ctor_priority, - CIR_OptionalPriorityAttr:$global_dtor_priority); + CIR_OptionalPriorityAttr:$global_dtor_priority, + OptionalAttr:$cxx_special_member + ); let regions = (region AnyRegion:$body); @@ -2572,7 +2650,32 @@ def CIR_FuncOp : CIR_Op<"func", [ //===------------------------------------------------------------------===// bool isDeclaration(); - }]; + + //===------------------------------------------------------------------===// + // C++ Special Member Functions + //===------------------------------------------------------------------===// + + /// Returns true if this function is a C++ special member function. + bool isCXXSpecialMemberFunction(); + + bool isCxxConstructor(); + bool isCxxDestructor(); + + /// Returns true if this function is a copy or move assignment operator. + bool isCxxSpecialAssignment(); + + /// Returns the kind of constructor this function represents, if any. + std::optional getCxxConstructorKind(); + + /// Returns the kind of assignment operator (move, copy) this function + /// represents, if any. + std::optional getCxxSpecialAssignKind(); + + /// Returns true if the function is a trivial C++ member functions such as + /// trivial default constructor, copy/move constructor, copy/move assignment, + /// or destructor. + bool isCxxTrivialMemberFunction(); +}]; let hasCustomAssemblyFormat = 1; let hasVerifier = 1; @@ -2752,6 +2855,100 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> { ]; } +//===----------------------------------------------------------------------===// +// AwaitOp +//===----------------------------------------------------------------------===// + +def CIR_AwaitKind : CIR_I32EnumAttr<"AwaitKind", "await kind", [ + I32EnumAttrCase<"Init", 0, "init">, + I32EnumAttrCase<"User", 1, "user">, + I32EnumAttrCase<"Yield", 2, "yield">, + I32EnumAttrCase<"Final", 3, "final"> +]>; + +def CIR_AwaitOp : CIR_Op<"await",[ + DeclareOpInterfaceMethods, + RecursivelySpeculatable, NoRegionArguments +]> { + let summary = "Wraps C++ co_await implicit logic"; + let description = [{ + The under the hood effect of using C++ `co_await expr` roughly + translates to: + + ```c++ + // co_await expr; + + auto &&x = CommonExpr(); + if (!x.await_ready()) { + ... + x.await_suspend(...); + ... + } + x.await_resume(); + ``` + + `cir.await` represents this logic by using 3 regions: + - ready: covers veto power from x.await_ready() + - suspend: wraps actual x.await_suspend() logic + - resume: handles x.await_resume() + + Breaking this up in regions allows individual scrutiny of conditions + which might lead to folding some of them out. Lowerings coming out + of CIR, e.g. LLVM, should use the `suspend` region to track more + lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend). + + There are also 4 flavors of `cir.await` available: + - `init`: compiler generated initial suspend via implicit `co_await`. + - `user`: also known as normal, representing a user written `co_await`. + - `yield`: user written `co_yield` expressions. + - `final`: compiler generated final suspend via implicit `co_await`. + + ```mlir + cir.scope { + ... // auto &&x = CommonExpr(); + cir.await(user, ready : { + ... // x.await_ready() + }, suspend : { + ... // x.await_suspend() + }, resume : { + ... // x.await_resume() + }) + } + ``` + + Note that resulution of the common expression is assumed to happen + as part of the enclosing await scope. + }]; + + let arguments = (ins CIR_AwaitKind:$kind); + let regions = (region SizedRegion<1>:$ready, + SizedRegion<1>:$suspend, + SizedRegion<1>:$resume); + let assemblyFormat = [{ + `(` $kind `,` + `ready` `:` $ready `,` + `suspend` `:` $suspend `,` + `resume` `:` $resume `,` + `)` + attr-dict + }]; + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins + "cir::AwaitKind":$kind, + CArg<"BuilderCallbackRef", + "nullptr">:$readyBuilder, + CArg<"BuilderCallbackRef", + "nullptr">:$suspendBuilder, + CArg<"BuilderCallbackRef", + "nullptr">:$resumeBuilder + )> + ]; + + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // CopyOp //===----------------------------------------------------------------------===// @@ -2988,6 +3185,39 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> { let hasCustomAssemblyFormat = 1; } +//===----------------------------------------------------------------------===// +// SqrtOp +//===----------------------------------------------------------------------===// + +def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> { + let summary = "Floating-point square root"; + + let description = [{ + The `cir.sqrt` operation computes the element-wise square root of its input. + + The input must be either: + • a floating-point scalar type, or + • a vector whose element type is floating-point. + + The result type must match the input type exactly. + + Examples: + // scalar + %r = cir.sqrt %x : !cir.fp64 + + // vector + %v = cir.sqrt %vec : !cir.vector + }]; + + // input and output types: float or vector-of-float + let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input); + let results = (outs CIR_AnyFloatOrVecOfFloatType:$result); + + let assemblyFormat = [{ + $input `:` type($input) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // UnreachableOp //===----------------------------------------------------------------------===// @@ -4018,6 +4248,72 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> { let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// FPClass Test Flags +//===----------------------------------------------------------------------===// + +def FPClassTestEnum : CIR_I32EnumAttr<"FPClassTest", "floating-point class test flags", [ + // Basic flags + I32EnumAttrCase<"SignalingNaN", 1, "fcSNan">, + I32EnumAttrCase<"QuietNaN", 2, "fcQNan">, + I32EnumAttrCase<"NegativeInfinity", 4, "fcNegInf">, + I32EnumAttrCase<"NegativeNormal", 8, "fcNegNormal">, + I32EnumAttrCase<"NegativeSubnormal", 16, "fcNegSubnormal">, + I32EnumAttrCase<"NegativeZero", 32, "fcNegZero">, + I32EnumAttrCase<"PositiveZero", 64, "fcPosZero">, + I32EnumAttrCase<"PositiveSubnormal", 128, "fcPosSubnormal">, + I32EnumAttrCase<"PositiveNormal", 256, "fcPosNormal">, + I32EnumAttrCase<"PositiveInfinity", 512, "fcPosInf">, + + // Composite flags + I32EnumAttrCase<"Nan", 3, "fcNan">, // fcSNan | fcQNan + I32EnumAttrCase<"Infinity", 516, "fcInf">, // fcPosInf | fcNegInf + I32EnumAttrCase<"Normal", 264, "fcNormal">, // fcPosNormal | fcNegNormal + I32EnumAttrCase<"Subnormal", 144, "fcSubnormal">, // fcPosSubnormal | fcNegSubnormal + I32EnumAttrCase<"Zero", 96, "fcZero">, // fcPosZero | fcNegZero + I32EnumAttrCase<"PositiveFinite", 448, "fcPosFinite">,// fcPosNormal | fcPosSubnormal | fcPosZero + I32EnumAttrCase<"NegativeFinite", 56, "fcNegFinite">, // fcNegNormal | fcNegSubnormal | fcNegZero + I32EnumAttrCase<"Finite", 504, "fcFinite">, // fcPosFinite | fcNegFinite + I32EnumAttrCase<"Positive", 960, "fcPositive">, // fcPosFinite | fcPosInf + I32EnumAttrCase<"Negative", 60, "fcNegative">, // fcNegFinite | fcNegInf + I32EnumAttrCase<"All", 1023, "fcAllFlags">, // fcNan | fcInf | fcFinite +]> { + let cppNamespace = "::cir"; +} + +def CIR_IsFPClassOp : CIR_Op<"is_fp_class"> { + let summary = "Corresponding to the `__builtin_fpclassify` builtin function in clang"; + + let description = [{ + The `cir.is_fp_class` operation takes a floating-point value as its first + argument and a bitfield of flags as its second argument. The operation + returns a boolean value indicating whether the floating-point value + satisfies the given flags. + + The flags must be a compile time constant and the values are: + + | Bit # | floating-point class | + | ----- | -------------------- | + | 0 | Signaling NaN | + | 1 | Quiet NaN | + | 2 | Negative infinity | + | 3 | Negative normal | + | 4 | Negative subnormal | + | 5 | Negative zero | + | 6 | Positive zero | + | 7 | Positive subnormal | + | 8 | Positive normal | + | 9 | Positive infinity | + }]; + + let arguments = (ins CIR_AnyFloatType:$src, + FPClassTestEnum:$flags); + let results = (outs CIR_BoolType:$result); + let assemblyFormat = [{ + $src `,` $flags `:` functional-type($src, $result) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Assume Operations //===----------------------------------------------------------------------===// @@ -4202,7 +4498,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> { When the `min` attribute is present, the operation returns the minimum guaranteed accessible size. When absent (max mode), it returns the maximum possible object size. Corresponds to `llvm.objectsize`'s `min` argument. - + The `dynamic` attribute determines if the value should be evaluated at runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument. @@ -4658,6 +4954,44 @@ def CIR_TryOp : CIR_Op<"try",[ let hasLLVMLowering = false; } +//===----------------------------------------------------------------------===// +// Exception related: EhInflightOp +//===----------------------------------------------------------------------===// + +def CIR_EhInflightOp : CIR_Op<"eh.inflight_exception"> { + let summary = "Materialize the catch clause formal parameter"; + let description = [{ + `cir.eh.inflight_exception` returns two values: + - `exception_ptr`: The exception pointer for the inflight exception + - `type_id`: the type info index for the exception type + This operation is expected to be the first operation in the unwind + destination basic blocks of a `cir.try_call` operation. + + The `cleanup` attribute indicates that clean up code must be run before the + values produced by this operation are used to dispatch the exception. This + cleanup code must be executed even if the exception is not caught. + This helps CIR to pass down more accurate information for LLVM lowering + to landingpads. + + Example: + + ```mlir + %exception_ptr, %type_id = cir.eh.inflight_exception + %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc] + %exception_ptr, %type_id = cir.eh.inflight_exception cleanup + `` + }]; + + let arguments = (ins UnitAttr:$cleanup, + OptionalAttr:$catch_type_list); + let results = (outs CIR_VoidPtrType:$exception_ptr, CIR_UInt32:$type_id); + let assemblyFormat = [{ + (`cleanup` $cleanup^)? + ($catch_type_list^)? + attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index ee6900141647f..e91a9e4db229a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -121,20 +121,36 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy); case X86::BI_mm_prefetch: case X86::BI__rdtsc: - case X86::BI__builtin_ia32_rdtscp: + case X86::BI__builtin_ia32_rdtscp: { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: - case X86::BI__builtin_ia32_lzcnt_u64: + case X86::BI__builtin_ia32_lzcnt_u64: { + mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc())); + return emitIntrinsicCallOp(*this, expr, "ctlz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: - case X86::BI__builtin_ia32_tzcnt_u64: + case X86::BI__builtin_ia32_tzcnt_u64: { + mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc())); + return emitIntrinsicCallOp(*this, expr, "cttz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return builder.getNullValue(convertType(expr->getType()), + getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -169,10 +185,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: case X86::BI__builtin_ia32_vec_set_v4di: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI_mm_setcsr: - case X86::BI__builtin_ia32_ldmxcsr: + case X86::BI__builtin_ia32_ldmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); + builder.createStore(loc, ops[0], tmp); + return emitIntrinsicCallOp(*this, expr, "x86.sse.ldmxcsr", + builder.getVoidTy(), tmp.getPointer()); + } case X86::BI_mm_getcsr: - case X86::BI__builtin_ia32_stmxcsr: + case X86::BI__builtin_ia32_stmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getType(), loc); + emitIntrinsicCallOp(*this, expr, "x86.sse.stmxcsr", builder.getVoidTy(), + tmp.getPointer()); + return builder.createLoad(loc, tmp); + } case X86::BI__builtin_ia32_xsave: case X86::BI__builtin_ia32_xsave64: case X86::BI__builtin_ia32_xrstor: @@ -681,10 +713,24 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: + errorNYI("masked round sqrt builtins"); + return {}; case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtps: { + mlir::Location loc = getLoc(E->getExprLoc()); + assert(E->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument"); + mlir::Value arg = emitScalarExpr(E->getArg(0)); + mlir::Type argTy = arg.getType(); + if (auto vecTy = argTy.dyn_cast()) { + assert(vecTy.getNumElements() == 4 && + vecTy.getElementType().isa() && + "__builtin_ia32_sqrtps expects <4 x float> / __m128"); + } + auto sqrt = cir::SqrtOp > ::create(builder, loc, argTy, arg); + return sqrt.getResult(); + } case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d43a462a25092..937c66082ca40 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -30,6 +30,7 @@ #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/DialectConversion.h" +#include "clang/Basic/LLVM.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -44,6 +45,96 @@ using namespace cir; using namespace llvm; +using namespace mlir; + +static std::string getLLVMIntrinsicNameForType(Type llvmTy) { + std::string s; + { + llvm::raw_string_ostream os(s); + llvm::Type *unused = nullptr; + os << llvmTy; + } + if (auto vecTy = llvmTy.dyn_cast()) { + } + return s; +} + +// Actual lowering +LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( + cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const { + + Location loc = op.getLoc(); + MLIRContext *ctx = rewriter.getContext(); + + Type cirResTy = op.getResult().getType(); + Type llvmResTy = getTypeConverter()->convertType(cirResTy); + if (!llvmResTy) + return op.emitOpError( + "expected LLVM dialect result type for cir.sqrt lowering"); + + Value operand = adaptor.getInput(); + Value llvmOperand = operand; + if (operand.getType() != llvmResTy) { + llvmOperand = rewriter.create(loc, llvmResTy, operand); + } + + // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result + std::string intrinsicName = "llvm.sqrt."; + std::string suffix; + + // If the CIR result type is a vector, include the 'vN' part in the suffix. + if (auto vec = cirResTy.dyn_cast()) { + Type elt = vec.getElementType(); + if (auto f = elt.dyn_cast()) { + unsigned width = f.getWidth(); + unsigned n = vec.getNumElements(); + if (width == 32) + suffix = "v" + std::to_string(n) + "f32"; + else if (width == 64) + suffix = "v" + std::to_string(n) + "f64"; + else if (width == 16) + suffix = "v" + std::to_string(n) + "f16"; + else + return op.emitOpError("unsupported float width for sqrt"); + } else { + return op.emitOpError("vector element must be floating point for sqrt"); + } + } else if (auto f = cirResTy.dyn_cast()) { + // Scalar float + unsigned width = f.getWidth(); + if (width == 32) + suffix = "f32"; + else if (width == 64) + suffix = "f64"; + else if (width == 16) + suffix = "f16"; + else + return op.emitOpError("unsupported float width for sqrt"); + } else { + return op.emitOpError("unsupported type for cir.sqrt lowering"); + } + + intrinsicName += suffix; + + // Ensure the llvm intrinsic function exists at module scope. Insert it at + // the start of the module body using an insertion guard. + ModuleOp module = op->getParentOfType(); + if (!module.lookupSymbol(intrinsicName)) { + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(module.getBody()); + auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy}, + /*isVarArg=*/false); + rewriter.create(loc, intrinsicName, llvmFnType); + } + + // Create the call and replace cir.sqrt + auto callee = SymbolRefAttr::get(ctx, intrinsicName); + rewriter.replaceOpWithNewOp(op, llvmResTy, callee, + ArrayRef{llvmOperand}); + + return mlir::success(); +} namespace cir { namespace direct { @@ -284,7 +375,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow, memoryEffect = mlir::LLVM::MemoryEffectsAttr::get( callOp->getContext(), /*other=*/ModRefInfo::Ref, /*argMem=*/ModRefInfo::Ref, - /*inaccessibleMem=*/ModRefInfo::Ref); + /*inaccessibleMem=*/ModRefInfo::Ref, + /*errnoMem=*/ModRefInfo::Ref, + /*targetMem0=*/ModRefInfo::Ref, + /*targetMem1=*/ModRefInfo::Ref); noUnwind = true; willReturn = true; break; @@ -293,7 +387,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow, memoryEffect = mlir::LLVM::MemoryEffectsAttr::get( callOp->getContext(), /*other=*/ModRefInfo::NoModRef, /*argMem=*/ModRefInfo::NoModRef, - /*inaccessibleMem=*/ModRefInfo::NoModRef); + /*inaccessibleMem=*/ModRefInfo::NoModRef, + /*errnoMem=*/ModRefInfo::NoModRef, + /*targetMem0=*/ModRefInfo::NoModRef, + /*targetMem1=*/ModRefInfo::NoModRef); noUnwind = true; willReturn = true; break; @@ -670,6 +767,18 @@ mlir::LogicalResult CIRToLLVMASinOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMIsFPClassOpLowering::matchAndRewrite( + cir::IsFPClassOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Value src = adaptor.getSrc(); + cir::FPClassTest flags = adaptor.getFlags(); + mlir::IntegerType retTy = rewriter.getI1Type(); + + rewriter.replaceOpWithNewOp( + op, retTy, src, static_cast(flags)); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite( cir::AssumeOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -1995,7 +2104,6 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp( // attributes are available on cir.global ops. This duplicates code // in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go // away when the placeholders are no longer needed. - assert(!cir::MissingFeatures::opGlobalConstant()); const bool isConst = op.getConstant(); assert(!cir::MissingFeatures::addressSpace()); const unsigned addrSpace = 0; @@ -2055,8 +2163,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. - assert(!cir::MissingFeatures::opGlobalConstant()); - const bool isConst = false; + const bool isConst = op.getConstant(); assert(!cir::MissingFeatures::addressSpace()); const unsigned addrSpace = 0; const bool isDsoLocal = op.getDsoLocal(); @@ -2570,6 +2677,120 @@ mlir::LogicalResult CIRToLLVMCmpOpLowering::matchAndRewrite( return cmpOp.emitError() << "unsupported type for CmpOp: " << type; } +mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite( + cir::BinOpOverflowOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Location loc = op.getLoc(); + cir::BinOpOverflowKind arithKind = op.getKind(); + cir::IntType operandTy = op.getLhs().getType(); + cir::IntType resultTy = op.getResult().getType(); + + EncompassedTypeInfo encompassedTyInfo = + computeEncompassedTypeWidth(operandTy, resultTy); + mlir::IntegerType encompassedLLVMTy = + rewriter.getIntegerType(encompassedTyInfo.width); + + mlir::Value lhs = adaptor.getLhs(); + mlir::Value rhs = adaptor.getRhs(); + if (operandTy.getWidth() < encompassedTyInfo.width) { + if (operandTy.isSigned()) { + lhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, lhs); + rhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, rhs); + } else { + lhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, lhs); + rhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, rhs); + } + } + + std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign, + encompassedTyInfo.width); + auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName); + + mlir::IntegerType overflowLLVMTy = rewriter.getI1Type(); + auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral( + rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy}); + + auto callLLVMIntrinOp = mlir::LLVM::CallIntrinsicOp::create( + rewriter, loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs}); + mlir::Value intrinRet = callLLVMIntrinOp.getResult(0); + + mlir::Value result = mlir::LLVM::ExtractValueOp::create( + rewriter, loc, intrinRet, ArrayRef{0}) + .getResult(); + mlir::Value overflow = mlir::LLVM::ExtractValueOp::create( + rewriter, loc, intrinRet, ArrayRef{1}) + .getResult(); + + if (resultTy.getWidth() < encompassedTyInfo.width) { + mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy); + auto truncResult = + mlir::LLVM::TruncOp::create(rewriter, loc, resultLLVMTy, result); + + // Extend the truncated result back to the encompassing type to check for + // any overflows during the truncation. + mlir::Value truncResultExt; + if (resultTy.isSigned()) + truncResultExt = mlir::LLVM::SExtOp::create( + rewriter, loc, encompassedLLVMTy, truncResult); + else + truncResultExt = mlir::LLVM::ZExtOp::create( + rewriter, loc, encompassedLLVMTy, truncResult); + auto truncOverflow = mlir::LLVM::ICmpOp::create( + rewriter, loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result); + + result = truncResult; + overflow = mlir::LLVM::OrOp::create(rewriter, loc, overflow, truncOverflow); + } + + mlir::Type boolLLVMTy = + getTypeConverter()->convertType(op.getOverflow().getType()); + if (boolLLVMTy != rewriter.getI1Type()) + overflow = mlir::LLVM::ZExtOp::create(rewriter, loc, boolLLVMTy, overflow); + + rewriter.replaceOp(op, mlir::ValueRange{result, overflow}); + + return mlir::success(); +} + +std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName( + cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) { + // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}` + + std::string name = "llvm."; + + if (isSigned) + name.push_back('s'); + else + name.push_back('u'); + + switch (opKind) { + case cir::BinOpOverflowKind::Add: + name.append("add."); + break; + case cir::BinOpOverflowKind::Sub: + name.append("sub."); + break; + case cir::BinOpOverflowKind::Mul: + name.append("mul."); + break; + } + + name.append("with.overflow.i"); + name.append(std::to_string(width)); + + return name; +} + +CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo +CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth( + cir::IntType operandTy, cir::IntType resultTy) { + bool sign = operandTy.getIsSigned() || resultTy.getIsSigned(); + unsigned width = + std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()), + resultTy.getWidth() + (sign && resultTy.isUnsigned())); + return {sign, width}; +} + mlir::LogicalResult CIRToLLVMShiftOpLowering::matchAndRewrite( cir::ShiftOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -3100,6 +3321,90 @@ mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite( return mlir::success(); } +static mlir::LLVM::LLVMStructType +getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) { + // Create the landing pad type: struct { ptr, i32 } + mlir::MLIRContext *ctx = rewriter.getContext(); + auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx); + llvm::SmallVector structFields = {llvmPtr, rewriter.getI32Type()}; + return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields); +} + +mlir::LogicalResult CIRToLLVMEhInflightOpLowering::matchAndRewrite( + cir::EhInflightOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto llvmFn = op->getParentOfType(); + assert(llvmFn && "expected LLVM function parent"); + mlir::Block *entryBlock = &llvmFn.getRegion().front(); + assert(entryBlock->isEntryBlock()); + + mlir::ArrayAttr catchListAttr = op.getCatchTypeListAttr(); + mlir::SmallVector catchSymAddrs; + + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); + mlir::Location loc = op.getLoc(); + + // %landingpad = landingpad { ptr, i32 } + // Note that since llvm.landingpad has to be the first operation on the + // block, any needed value for its operands has to be added somewhere else. + if (catchListAttr) { + // catch ptr @_ZTIi + // catch ptr @_ZTIPKc + for (mlir::Attribute catchAttr : catchListAttr) { + auto symAttr = cast(catchAttr); + // Generate `llvm.mlir.addressof` for each symbol, and place those + // operations in the LLVM function entry basic block. + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(entryBlock); + mlir::Value addrOp = mlir::LLVM::AddressOfOp::create( + rewriter, loc, llvmPtrTy, symAttr.getValue()); + catchSymAddrs.push_back(addrOp); + } + } else if (!op.getCleanup()) { + // We need to emit catch-all only if cleanup is not set, because when we + // have catch-all handler, there is no case when we set would unwind past + // the handler + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(entryBlock); + mlir::Value nullOp = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy); + catchSymAddrs.push_back(nullOp); + } + + // %slot = extractvalue { ptr, i32 } %x, 0 + // %selector = extractvalue { ptr, i32 } %x, 1 + mlir::LLVM::LLVMStructType llvmLandingPadStructTy = + getLLVMLandingPadStructTy(rewriter); + auto landingPadOp = mlir::LLVM::LandingpadOp::create( + rewriter, loc, llvmLandingPadStructTy, catchSymAddrs); + + if (op.getCleanup()) + landingPadOp.setCleanup(true); + + mlir::Value slot = + mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 0); + mlir::Value selector = + mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 1); + rewriter.replaceOp(op, mlir::ValueRange{slot, selector}); + + // Landing pads are required to be in LLVM functions with personality + // attribute. + // TODO(cir): for now hardcode personality creation in order to start + // adding exception tests, once we annotate CIR with such information, + // change it to be in FuncOp lowering instead. + mlir::OpBuilder::InsertionGuard guard(rewriter); + // Insert personality decl before the current function. + rewriter.setInsertionPoint(llvmFn); + auto personalityFnTy = + mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {}, + /*isVarArg=*/true); + + const StringRef fnName = "__gxx_personality_v0"; + createLLVMFuncOpIfNotExist(rewriter, op, fnName, personalityFnTy); + llvmFn.setPersonality(fnName); + + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite( cir::TrapOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -3843,6 +4148,12 @@ mlir::LogicalResult CIRToLLVMBlockAddressOpLowering::matchAndRewrite( return mlir::failure(); } +mlir::LogicalResult CIRToLLVMAwaitOpLowering::matchAndRewrite( + cir::AwaitOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + return mlir::failure(); +} + std::unique_ptr createConvertCIRToLLVMPass() { return std::make_unique(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 0591de545b81d..be6a380372efe 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -12,11 +12,25 @@ #ifndef CLANG_CIR_LOWERTOLLVM_H #define CLANG_CIR_LOWERTOLLVM_H +#include "mlir/Conversion/PatternRewriter.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Transforms/DialectConversion.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +namespace cir { +class SqrtOp; +} + +class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override; +}; + namespace cir { namespace direct { diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c new file mode 100644 index 0000000000000..6e1dace82928c --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c @@ -0,0 +1,46 @@ +// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.) +// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s + +#include + +// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit) +__m128 test_sqrtps(__m128 x) { + return __builtin_ia32_sqrtps(x); +} +// CHECK-LABEL: cir.func @test_sqrtps +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit) +__m256 test_sqrtps256(__m256 x) { + return __builtin_ia32_sqrtps256(x); +} +// CHECK-LABEL: cir.func @test_sqrtps256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit) +__m512 test_sqrtps512(__m512 x) { + return __builtin_ia32_sqrtps512(x); +} +// CHECK-LABEL: cir.func @test_sqrtps512 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit) +__m128d test_sqrtpd(__m128d x) { + return __builtin_ia32_sqrtpd(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit) +__m256d test_sqrtpd256(__m256d x) { + return __builtin_ia32_sqrtpd256(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit) +__m512d test_sqrtpd512(__m512d x) { + return __builtin_ia32_sqrtpd512(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd512 +// CHECK: cir.sqrt From 4a39fd7185cd294b96a4faadc2fa21f2a4d53b6b Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Sat, 29 Nov 2025 09:59:40 +0530 Subject: [PATCH 02/33] Implement sqrt builtins for all vector sizes --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 20 ++---- .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 67 +++++++++++++++++++ 2 files changed, 73 insertions(+), 14 deletions(-) create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 45c0de322925a..f8a139ec7a8e0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -786,24 +786,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: { - mlir::Location loc = getLoc(expr->getExprLoc()); - assert(expr->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument"); - mlir::Value arg = emitScalarExpr(expr->getArg(0)); - mlir::Type argTy = arg.getType(); - if (auto vecTy = argTy.dyn_cast()) { - assert(vecTy.getNumElements() == 4 && - vecTy.getElementType().isa() && - "__builtin_ia32_sqrtps expects <4 x float> / __m128"); - } - auto sqrt = cir::SqrtOp::create(builder, loc, argTy, arg); - return sqrt.getResult(); - } + case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: case X86::BI__builtin_ia32_sqrtps512: - case X86::BI__builtin_ia32_sqrtpd512: + case X86::BI__builtin_ia32_sqrtpd512: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value arg = ops[0]; + return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); + } case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: case X86::BI__builtin_ia32_pmuludq512: diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c new file mode 100644 index 0000000000000..ef5cb954e3efe --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c @@ -0,0 +1,67 @@ +// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.) +// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s + +#include + +// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit) +__m128 test_sqrtps(__m128 x) { + return __builtin_ia32_sqrtps(x); +} +// CHECK-LABEL: cir.func @test_sqrtps +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit) +__m256 test_sqrtps256(__m256 x) { + return __builtin_ia32_sqrtps256(x); +} +// CHECK-LABEL: cir.func @test_sqrtps256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit) +__m512 test_sqrtps512(__m512 x) { + return __builtin_ia32_sqrtps512(x); +} +// CHECK-LABEL: cir.func @test_sqrtps512 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit) +__m128d test_sqrtpd(__m128d x) { + return __builtin_ia32_sqrtpd(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit) +__m256d test_sqrtpd256(__m256d x) { + return __builtin_ia32_sqrtpd256(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit) +__m512d test_sqrtpd512(__m512d x) { + return __builtin_ia32_sqrtpd512(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd512 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit) +__m128h test_sqrtph(__m128h x) { + return __builtin_ia32_sqrtph(x); +} +// CHECK-LABEL: cir.func @test_sqrtph +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit) +__m256h test_sqrtph256(__m256h x) { + return __builtin_ia32_sqrtph256(x); +} +// CHECK-LABEL: cir.func @test_sqrtph256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit) +__m512h test_sqrtph512(__m512h x) { + return __builtin_ia32_sqrtph512(x); +} +// CHECK-LABEL: cir.func @test_sqrtph512 +// CHECK: cir.sqrt \ No newline at end of file From ef3fd9711494e864190932566bcfe46231b95c51 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Sun, 30 Nov 2025 11:44:23 +0530 Subject: [PATCH 03/33] Test file renamed --- .../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 ------------------- 1 file changed, 46 deletions(-) delete mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c deleted file mode 100644 index 6e1dace82928c..0000000000000 --- a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c +++ /dev/null @@ -1,46 +0,0 @@ -// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.) -// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s - -#include - -// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit) -__m128 test_sqrtps(__m128 x) { - return __builtin_ia32_sqrtps(x); -} -// CHECK-LABEL: cir.func @test_sqrtps -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit) -__m256 test_sqrtps256(__m256 x) { - return __builtin_ia32_sqrtps256(x); -} -// CHECK-LABEL: cir.func @test_sqrtps256 -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit) -__m512 test_sqrtps512(__m512 x) { - return __builtin_ia32_sqrtps512(x); -} -// CHECK-LABEL: cir.func @test_sqrtps512 -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit) -__m128d test_sqrtpd(__m128d x) { - return __builtin_ia32_sqrtpd(x); -} -// CHECK-LABEL: cir.func @test_sqrtpd -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit) -__m256d test_sqrtpd256(__m256d x) { - return __builtin_ia32_sqrtpd256(x); -} -// CHECK-LABEL: cir.func @test_sqrtpd256 -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit) -__m512d test_sqrtpd512(__m512d x) { - return __builtin_ia32_sqrtpd512(x); -} -// CHECK-LABEL: cir.func @test_sqrtpd512 -// CHECK: cir.sqrt From 97056731fce0a5e5c2185e16986e0189cec95c7b Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Tue, 2 Dec 2025 21:29:57 +0530 Subject: [PATCH 04/33] Add sqrt changes patch --- my-sqrt-changes.patch | Bin 0 -> 12058 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 my-sqrt-changes.patch diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch new file mode 100644 index 0000000000000000000000000000000000000000..87c0ca69ac8abe6aaa684ffbbce3c65e342f6066 GIT binary patch literal 12058 zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$ zv=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw@2E?gZTD9vmS+IJum2D zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B% zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@ z72&0L_M&ZeqO}fWOPuPCuGis@x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz* zYVV{s&bbKhwLjM69JXa4%V|k>FK6O_x1Z>~7uLls z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5mg}Ji zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W- zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6qPTum| zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(icnT;%Y@TU-AB7rhN6 z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#- zEOU3*%q4epc2@}3`8dv7)=nR3@AKL_ST8*B8QRNSw3ad#cG8y}_e3KhdE@rO&|+-`Rtr9a@{d{p zj#!8sImaJ0+7SivWU7Y>&BLaz}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^ zDHA)W(wIrD^+tE5iL#DB)kc@yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J@+fwrmpSnx_v1_UJ^fG9O51 zh;DPsFbkxbPgd5@+O~Wy*-w(fGZU74EDr9W*c&>Sjh@Go=f_RSL!R=;^g=6NM{X~~ zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8 zs=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3 zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>WfFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok@AbSKE4q|C z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m z-H_#w5vgd`YtZ*sLwmxCQe@A$ru*yJ7t*!H^57GPy6_Imr*$3l#x?ffQsr?4 ze!^(SdXW23d8f!B)YR;5j8epdb|n*WYGf#4 z3t7*40d4)|`NJD;16PmYz+ z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+ zR@2pNi>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB@Q~DiK z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML i>Z$uV<~q4bZjLO@Ne}=*8c%%jt^@9 literal 0 HcmV?d00001 From 21119e5ae7529285662c0e9dc6c0024e07a5899b Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:19:33 +0530 Subject: [PATCH 05/33] group with other floating point ops --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 41 ++-------- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 +- .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 80 +++++-------------- 3 files changed, 31 insertions(+), 94 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 2dc71c68f8a94..dc9e3c6a486d6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -3275,39 +3275,6 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> { let hasCustomAssemblyFormat = 1; } -//===----------------------------------------------------------------------===// -// SqrtOp -//===----------------------------------------------------------------------===// - -def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> { - let summary = "Floating-point square root"; - - let description = [{ - The `cir.sqrt` operation computes the element-wise square root of its input. - - The input must be either: - • a floating-point scalar type, or - • a vector whose element type is floating-point. - - The result type must match the input type exactly. - - Examples: - // scalar - %r = cir.sqrt %x : !cir.fp64 - - // vector - %v = cir.sqrt %vec : !cir.vector - }]; - - // input and output types: float or vector-of-float - let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input); - let results = (outs CIR_AnyFloatOrVecOfFloatType:$result); - - let assemblyFormat = [{ - $input `:` type($input) attr-dict - }]; -} - //===----------------------------------------------------------------------===// // UnreachableOp //===----------------------------------------------------------------------===// @@ -4664,6 +4631,14 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> { // Floating Point Ops //===----------------------------------------------------------------------===// +def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> { + let summary = "Floating-point square root operation"; + + let description = [{ + Computes the square root of a floating-point value or vector. + }]; +} + class CIR_UnaryFPToFPBuiltinOp : CIR_Op { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index f8a139ec7a8e0..35ba0f48ce6d8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -781,14 +781,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: - errorNYI("masked round sqrt builtins"); - return {}; case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: + errorNYI("Unimplemented builtin"); + return {}; case X86::BI__builtin_ia32_sqrtph512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c index ef5cb954e3efe..97993cabf0ebf 100644 --- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c +++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c @@ -1,67 +1,29 @@ -// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.) -// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s - #include +// Test X86-specific sqrt builtins -// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit) -__m128 test_sqrtps(__m128 x) { - return __builtin_ia32_sqrtps(x); -} -// CHECK-LABEL: cir.func @test_sqrtps -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit) -__m256 test_sqrtps256(__m256 x) { - return __builtin_ia32_sqrtps256(x); -} -// CHECK-LABEL: cir.func @test_sqrtps256 -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit) -__m512 test_sqrtps512(__m512 x) { - return __builtin_ia32_sqrtps512(x); -} -// CHECK-LABEL: cir.func @test_sqrtps512 -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit) -__m128d test_sqrtpd(__m128d x) { - return __builtin_ia32_sqrtpd(x); -} -// CHECK-LABEL: cir.func @test_sqrtpd -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit) -__m256d test_sqrtpd256(__m256d x) { - return __builtin_ia32_sqrtpd256(x); -} -// CHECK-LABEL: cir.func @test_sqrtpd256 -// CHECK: cir.sqrt - -// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit) -__m512d test_sqrtpd512(__m512d x) { - return __builtin_ia32_sqrtpd512(x); -} -// CHECK-LABEL: cir.func @test_sqrtpd512 -// CHECK: cir.sqrt +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit) -__m128h test_sqrtph(__m128h x) { - return __builtin_ia32_sqrtph(x); +// Test __builtin_ia32_sqrtph512 +__m512h test_sqrtph512(__m512h a) { + return __builtin_ia32_sqrtph512(a); } -// CHECK-LABEL: cir.func @test_sqrtph -// CHECK: cir.sqrt +// CHECK: cir.func @test_sqrtph512 +// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector +// CHECK: cir.return [[RES]] -// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit) -__m256h test_sqrtph256(__m256h x) { - return __builtin_ia32_sqrtph256(x); +// Test __builtin_ia32_sqrtps512 +__m512 test_sqrtps512(__m512 a) { + return __builtin_ia32_sqrtps512(a); } -// CHECK-LABEL: cir.func @test_sqrtph256 -// CHECK: cir.sqrt +// CHECK: cir.func @test_sqrtps512 +// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector +// CHECK: cir.return [[RES]] -// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit) -__m512h test_sqrtph512(__m512h x) { - return __builtin_ia32_sqrtph512(x); +// Test __builtin_ia32_sqrtpd512 +__m512d test_sqrtpd512(__m512d a) { + return __builtin_ia32_sqrtpd512(a); } -// CHECK-LABEL: cir.func @test_sqrtph512 -// CHECK: cir.sqrt \ No newline at end of file +// CHECK: cir.func @test_sqrtpd512 +// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector +// CHECK: cir.return [[RES]] \ No newline at end of file From 90878ec8d8d6b5b46286c419c4187f01215b6e4b Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:25:20 +0530 Subject: [PATCH 06/33] place the implementation with other floating point ops --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index dc9e3c6a486d6..fa10848f4397a 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4631,14 +4631,6 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> { // Floating Point Ops //===----------------------------------------------------------------------===// -def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> { - let summary = "Floating-point square root operation"; - - let description = [{ - Computes the square root of a floating-point value or vector. - }]; -} - class CIR_UnaryFPToFPBuiltinOp : CIR_Op { @@ -4650,6 +4642,14 @@ class CIR_UnaryFPToFPBuiltinOp let llvmOp = llvmOpName; } +def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> { + let summary = "Floating-point square root operation"; + + let description = [{ + Computes the square root of a floating-point value or vector. + }]; +} + def CIR_ACosOp : CIR_UnaryFPToFPBuiltinOp<"acos", "ACosOp"> { let summary = "Computes the arcus cosine of the specified value"; let description = [{ From 3529f40b48025f543a2b3ced9d6aa63a2241283f Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:30:58 +0530 Subject: [PATCH 07/33] Update clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5514a4cd0876d..709e3026e51f1 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -45,30 +45,30 @@ using namespace cir; using namespace llvm; -using namespace mlir; -static std::string getLLVMIntrinsicNameForType(Type llvmTy) { + +static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) { std::string s; { llvm::raw_string_ostream os(s); llvm::Type *unused = nullptr; os << llvmTy; } - if (auto vecTy = llvmTy.dyn_cast()) { + if (auto vecTy = llvmTy.dyn_cast()) { } return s; } // Actual lowering -LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( +mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, - ConversionPatternRewriter &rewriter) const { + mlir::ConversionPatternRewriter &rewriter) const { - Location loc = op.getLoc(); - MLIRContext *ctx = rewriter.getContext(); + mlir::Location loc = op.getLoc(); + mlir::MLIRContext *ctx = rewriter.getContext(); - Type cirResTy = op.getResult().getType(); - Type llvmResTy = getTypeConverter()->convertType(cirResTy); + mlir::Type cirResTy = op.getResult().getType(); + mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy); if (!llvmResTy) return op.emitOpError( "expected LLVM dialect result type for cir.sqrt lowering"); From 92d0ac3ed203e38e244c0afabb5f3524d1772645 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:44:10 +0530 Subject: [PATCH 08/33] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 35ba0f48ce6d8..eb9ac260f225d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtpd512: { mlir::Location loc = getLoc(expr->getExprLoc()); mlir::Value arg = ops[0]; - return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); + return builder.create(loc, arg.getType(), arg).getResult(); } case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: From 0385662da0847396b4096ddad2c90bcf1c593d0f Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:45:45 +0530 Subject: [PATCH 09/33] Update clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 709e3026e51f1..a80103764a60a 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -51,7 +51,6 @@ static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) { std::string s; { llvm::raw_string_ostream os(s); - llvm::Type *unused = nullptr; os << llvmTy; } if (auto vecTy = llvmTy.dyn_cast()) { From ddcb7b8476e796d9945bbde26a39e567853da34e Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:51:05 +0530 Subject: [PATCH 10/33] update clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.cpp --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5514a4cd0876d..c17980f7ffbf7 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -54,8 +54,6 @@ static std::string getLLVMIntrinsicNameForType(Type llvmTy) { llvm::Type *unused = nullptr; os << llvmTy; } - if (auto vecTy = llvmTy.dyn_cast()) { - } return s; } From 233efad67b9677f6e77034e6868905070708765a Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:56:02 +0530 Subject: [PATCH 11/33] Update clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index be6a380372efe..1f69b7d66f25e 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -12,7 +12,6 @@ #ifndef CLANG_CIR_LOWERTOLLVM_H #define CLANG_CIR_LOWERTOLLVM_H -#include "mlir/Conversion/PatternRewriter.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Transforms/DialectConversion.h" From 9d940bc80e60470e6f5dcc82d74e45dd361acdc2 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Wed, 3 Dec 2025 19:57:44 +0530 Subject: [PATCH 12/33] Update clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index ebb41e42a2871..5dfef939126d0 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -120,8 +120,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( if (!module.lookupSymbol(intrinsicName)) { OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(module.getBody()); - auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy}, - /*isVarArg=*/false); + auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy}, + /*isVarArg=*/false); rewriter.create(loc, intrinsicName, llvmFnType); } From 51bbccad4f784a4c44d6562ccef36caaf2f1b521 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 16:18:27 +0530 Subject: [PATCH 13/33] Remove BOM character --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index ebb41e42a2871..0395f905c866b 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//===-- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From e5789b65fc43637493e07979a9ac56dfd9cbee37 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 16:29:05 +0530 Subject: [PATCH 14/33] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5dfef939126d0..11f042737d658 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 8937b12959c7a4336b6857c1a57b54e6c99d5457 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 16:31:14 +0530 Subject: [PATCH 15/33] Apply suggestion from @andykaylor Co-authored-by: Andy Kaylor --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index eb9ac260f225d..35ba0f48ce6d8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtpd512: { mlir::Location loc = getLoc(expr->getExprLoc()); mlir::Value arg = ops[0]; - return builder.create(loc, arg.getType(), arg).getResult(); + return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); } case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: From 8a02c504acf42c81bd0c53df89e296480b74c05b Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 16:44:08 +0530 Subject: [PATCH 16/33] add description --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index fa10848f4397a..06eb7d6689362 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4647,6 +4647,19 @@ def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> { let description = [{ Computes the square root of a floating-point value or vector. + + The input must be either: + • a floating-point scalar type, or + • a vector whose element type is floating-point. + + The result type must match the input type exactly. + + Examples: + // scalar + %r = cir.sqrt %x : !cir.fp64 + + // vector + %v = cir.sqrt %vec : !cir.vector }]; } From 82a9395517d79c79653194939c107234e1628d05 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 17:17:40 +0530 Subject: [PATCH 17/33] Remove undefined sqrt builtin cases --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 35ba0f48ce6d8..0b796e4e3a860 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -781,12 +781,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: - case X86::BI__builtin_ia32_sqrtpd256: - case X86::BI__builtin_ia32_sqrtpd: - case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: - case X86::BI__builtin_ia32_sqrtph256: - case X86::BI__builtin_ia32_sqrtph: errorNYI("Unimplemented builtin"); return {}; case X86::BI__builtin_ia32_sqrtph512: From 6bd328210bcd68abe14e36895f6d587a54b99ed2 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 17:25:40 +0530 Subject: [PATCH 18/33] Remove unused getLLVMIntrinsicNameForType function --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5dfef939126d0..8b8b756a7f691 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -46,16 +46,6 @@ using namespace cir; using namespace llvm; - -static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) { - std::string s; - { - llvm::raw_string_ostream os(s); - os << llvmTy; - } - return s; -} - // Actual lowering mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, From 8232ce8a4de0e8c179d42739b354695987be458f Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 17:37:11 +0530 Subject: [PATCH 19/33] Removed braces --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 8b8b756a7f691..4cbea38a30c50 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -62,9 +62,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( Value operand = adaptor.getInput(); Value llvmOperand = operand; - if (operand.getType() != llvmResTy) { + if (operand.getType() != llvmResTy) llvmOperand = rewriter.create(loc, llvmResTy, operand); - } // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result std::string intrinsicName = "llvm.sqrt."; @@ -84,9 +83,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( suffix = "v" + std::to_string(n) + "f16"; else return op.emitOpError("unsupported float width for sqrt"); - } else { + } else return op.emitOpError("vector element must be floating point for sqrt"); - } } else if (auto f = cirResTy.dyn_cast()) { // Scalar float unsigned width = f.getWidth(); @@ -98,9 +96,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( suffix = "f16"; else return op.emitOpError("unsupported float width for sqrt"); - } else { + } else return op.emitOpError("unsupported type for cir.sqrt lowering"); - } intrinsicName += suffix; From bc8e4ccfc22731aaee790659d8b5072ab36be7a7 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 17:39:45 +0530 Subject: [PATCH 20/33] Update clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp Co-authored-by: Andy Kaylor --- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 74 +------------------ 1 file changed, 4 insertions(+), 70 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 4cbea38a30c50..e7a6e8677569a 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -47,77 +47,11 @@ using namespace cir; using namespace llvm; // Actual lowering -mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( - cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, - mlir::ConversionPatternRewriter &rewriter) const { - - mlir::Location loc = op.getLoc(); - mlir::MLIRContext *ctx = rewriter.getContext(); - - mlir::Type cirResTy = op.getResult().getType(); - mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy); - if (!llvmResTy) - return op.emitOpError( - "expected LLVM dialect result type for cir.sqrt lowering"); - - Value operand = adaptor.getInput(); - Value llvmOperand = operand; - if (operand.getType() != llvmResTy) - llvmOperand = rewriter.create(loc, llvmResTy, operand); - - // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result - std::string intrinsicName = "llvm.sqrt."; - std::string suffix; - - // If the CIR result type is a vector, include the 'vN' part in the suffix. - if (auto vec = cirResTy.dyn_cast()) { - Type elt = vec.getElementType(); - if (auto f = elt.dyn_cast()) { - unsigned width = f.getWidth(); - unsigned n = vec.getNumElements(); - if (width == 32) - suffix = "v" + std::to_string(n) + "f32"; - else if (width == 64) - suffix = "v" + std::to_string(n) + "f64"; - else if (width == 16) - suffix = "v" + std::to_string(n) + "f16"; - else - return op.emitOpError("unsupported float width for sqrt"); - } else - return op.emitOpError("vector element must be floating point for sqrt"); - } else if (auto f = cirResTy.dyn_cast()) { - // Scalar float - unsigned width = f.getWidth(); - if (width == 32) - suffix = "f32"; - else if (width == 64) - suffix = "f64"; - else if (width == 16) - suffix = "f16"; - else - return op.emitOpError("unsupported float width for sqrt"); - } else - return op.emitOpError("unsupported type for cir.sqrt lowering"); - - intrinsicName += suffix; - - // Ensure the llvm intrinsic function exists at module scope. Insert it at - // the start of the module body using an insertion guard. - ModuleOp module = op->getParentOfType(); - if (!module.lookupSymbol(intrinsicName)) { - OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(module.getBody()); - auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy}, - /*isVarArg=*/false); - rewriter.create(loc, intrinsicName, llvmFnType); - } - - // Create the call and replace cir.sqrt - auto callee = SymbolRefAttr::get(ctx, intrinsicName); - rewriter.replaceOpWithNewOp(op, llvmResTy, callee, - ArrayRef{llvmOperand}); - + mlir::Type resTy = typeConverter->convertType(op.getType()); + rewriter.replaceOpWithNewOp(op, resTy, + adaptor.getSrc()); return mlir::success(); + } namespace cir { From 92847619f4b000f6dcefe307543dcf6e7b917a14 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 19:37:22 +0530 Subject: [PATCH 21/33] update clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.h --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 1f69b7d66f25e..0591de545b81d 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -17,19 +17,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" -namespace cir { -class SqrtOp; -} - -class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern { -public: - using mlir::OpConversionPattern::OpConversionPattern; - - mlir::LogicalResult - matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, - mlir::ConversionPatternRewriter &rewriter) const override; -}; - namespace cir { namespace direct { From 8647b5c719a7d91c3dbd3954b022621c3b550aaf Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 19:41:21 +0530 Subject: [PATCH 22/33] Update clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c Co-authored-by: Andy Kaylor --- clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c index 97993cabf0ebf..bf496f2ea733d 100644 --- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c +++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c @@ -2,7 +2,11 @@ // Test X86-specific sqrt builtins // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s // Test __builtin_ia32_sqrtph512 __m512h test_sqrtph512(__m512h a) { From 4bac65a58020456624d39efb64f27d1301c4bb23 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 19:48:11 +0530 Subject: [PATCH 23/33] Update test --- .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c index bf496f2ea733d..a3de192f9e142 100644 --- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c +++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c @@ -12,22 +12,34 @@ __m512h test_sqrtph512(__m512h a) { return __builtin_ia32_sqrtph512(a); } -// CHECK: cir.func @test_sqrtph512 -// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector -// CHECK: cir.return [[RES]] +// CIR: cir.func @test_sqrtph512 +// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector +// CIR: cir.return [[RES]] +// LLVM: define {{.*}} @test_sqrtph512 +// LLVM: call <32 x half> @llvm.sqrt.v32f16 +// OGCG: define {{.*}} @test_sqrtph512 +// OGCG: call <32 x half> @llvm.sqrt.v32f16 // Test __builtin_ia32_sqrtps512 __m512 test_sqrtps512(__m512 a) { return __builtin_ia32_sqrtps512(a); } -// CHECK: cir.func @test_sqrtps512 -// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector -// CHECK: cir.return [[RES]] +// CIR: cir.func @test_sqrtps512 +// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector +// CIR: cir.return [[RES]] +// LLVM: define {{.*}} @test_sqrtps512 +// LLVM: call <16 x float> @llvm.sqrt.v16f32 +// OGCG: define {{.*}} @test_sqrtps512 +// OGCG: call <16 x float> @llvm.sqrt.v16f32 // Test __builtin_ia32_sqrtpd512 __m512d test_sqrtpd512(__m512d a) { return __builtin_ia32_sqrtpd512(a); } -// CHECK: cir.func @test_sqrtpd512 -// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector -// CHECK: cir.return [[RES]] \ No newline at end of file +// CIR: cir.func @test_sqrtpd512 +// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector +// CIR: cir.return [[RES]] +// LLVM: define {{.*}} @test_sqrtpd512 +// LLVM: call <8 x double> @llvm.sqrt.v8f64 +// OGCG: define {{.*}} @test_sqrtpd512 +// OGCG: call <8 x double> @llvm.sqrt.v8f64 \ No newline at end of file From b1ff2abd50b55470361f721053fc72a9080c20d6 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Thu, 4 Dec 2025 20:25:55 +0530 Subject: [PATCH 24/33] update clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index e7a6e8677569a..846fc5c07f798 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 884300615cd4900e44af48016cd895005821e41f Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Fri, 5 Dec 2025 19:06:33 +0530 Subject: [PATCH 25/33] Remove unused include --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 846fc5c07f798..08573c0ae83bb 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -30,7 +30,6 @@ #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/DialectConversion.h" -#include "clang/Basic/LLVM.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" From ed82423252e2b452efc4d3265166c08e797b259e Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Fri, 5 Dec 2025 19:19:21 +0530 Subject: [PATCH 26/33] Move sqrt lowering with other floating point operations --- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 08573c0ae83bb..34a1ca3f10c01 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -45,14 +45,6 @@ using namespace cir; using namespace llvm; -// Actual lowering - mlir::Type resTy = typeConverter->convertType(op.getType()); - rewriter.replaceOpWithNewOp(op, resTy, - adaptor.getSrc()); - return mlir::success(); - -} - namespace cir { namespace direct { @@ -194,6 +186,14 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult SqrtOpLowering::matchAndRewrite( + cir::SqrtOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resTy = typeConverter->convertType(op.getType()); + rewriter.replaceOpWithNewOp(op, resTy, adaptor.getSrc()); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMCosOpLowering::matchAndRewrite( cir::CosOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { From 961c9f95a70b1368b2e33adeae0aa63fd2b9ae8c Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Sat, 6 Dec 2025 09:31:10 +0530 Subject: [PATCH 27/33] Remove BOM character --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 34a1ca3f10c01..ffaebdcd9f062 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 4dd8aa07aba47bbfdaf39ef36c5f4f951fb7673a Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Sat, 6 Dec 2025 11:41:11 +0530 Subject: [PATCH 28/33] Delete my-sqrt-changes.patch --- my-sqrt-changes.patch | Bin 12058 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 my-sqrt-changes.patch diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch deleted file mode 100644 index 87c0ca69ac8abe6aaa684ffbbce3c65e342f6066..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12058 zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$ zv=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw@2E?gZTD9vmS+IJum2D zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B% zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@ z72&0L_M&ZeqO}fWOPuPCuGis@x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz* zYVV{s&bbKhwLjM69JXa4%V|k>FK6O_x1Z>~7uLls z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5mg}Ji zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W- zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6qPTum| zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(icnT;%Y@TU-AB7rhN6 z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#- zEOU3*%q4epc2@}3`8dv7)=nR3@AKL_ST8*B8QRNSw3ad#cG8y}_e3KhdE@rO&|+-`Rtr9a@{d{p zj#!8sImaJ0+7SivWU7Y>&BLaz}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^ zDHA)W(wIrD^+tE5iL#DB)kc@yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J@+fwrmpSnx_v1_UJ^fG9O51 zh;DPsFbkxbPgd5@+O~Wy*-w(fGZU74EDr9W*c&>Sjh@Go=f_RSL!R=;^g=6NM{X~~ zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8 zs=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3 zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>WfFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok@AbSKE4q|C z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m z-H_#w5vgd`YtZ*sLwmxCQe@A$ru*yJ7t*!H^57GPy6_Imr*$3l#x?ffQsr?4 ze!^(SdXW23d8f!B)YR;5j8epdb|n*WYGf#4 z3t7*40d4)|`NJD;16PmYz+ z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+ zR@2pNi>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB@Q~DiK z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML i>Z$uV<~q4bZjLO@Ne}=*8c%%jt^@9 From cc5ffa1eee82b86f897ac3cabed6f5b39f28ed61 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Sun, 7 Dec 2025 17:01:19 +0530 Subject: [PATCH 29/33] Update errorNYI call --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3140 ++++++++++---------- my-sqrt-changes.patch | Bin 12058 -> 0 bytes 2 files changed, 1571 insertions(+), 1569 deletions(-) delete mode 100644 my-sqrt-changes.patch diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index d99dfad0f6c45..080a696b868cf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1,1569 +1,1571 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This contains code to emit x86/x86_64 Builtin calls as CIR or a function -// call to be later resolved. -// -//===----------------------------------------------------------------------===// - -#include "CIRGenBuilder.h" -#include "CIRGenFunction.h" -#include "CIRGenModule.h" -#include "mlir/IR/Location.h" -#include "mlir/IR/ValueRange.h" -#include "clang/Basic/Builtins.h" -#include "clang/Basic/TargetBuiltins.h" -#include "clang/CIR/Dialect/IR/CIRTypes.h" -#include "clang/CIR/MissingFeatures.h" - -using namespace clang; -using namespace clang::CIRGen; - -template -static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, - mlir::Location loc, const StringRef str, - const mlir::Type &resTy, - Operands &&...op) { - return cir::LLVMIntrinsicCallOp::create(builder, loc, - builder.getStringAttr(str), resTy, - std::forward(op)...) - .getResult(); -} - -// OG has unordered comparison as a form of optimization in addition to -// ordered comparison, while CIR doesn't. -// -// This means that we can't encode the comparison code of UGT (unordered -// greater than), at least not at the CIR level. -// -// The boolean shouldInvert compensates for this. -// For example: to get to the comparison code UGT, we pass in -// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT. - -// There are several ways to support this otherwise: -// - register extra CmpOpKind for unordered comparison types and build the -// translation code for -// to go from CIR -> LLVM dialect. Notice we get this naturally with -// shouldInvert, benefiting from existing infrastructure, albeit having to -// generate an extra `not` at CIR). -// - Just add extra comparison code to a new VecCmpOpKind instead of -// cluttering CmpOpKind. -// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered -// comparison -// - Just emit the intrinsics call instead of calling this helper, see how the -// LLVM lowering handles this. -static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, - llvm::SmallVector &ops, - mlir::Location loc, cir::CmpOpKind pred, - bool shouldInvert) { - assert(!cir::MissingFeatures::cgFPOptionsRAII()); - // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]); - mlir::Value bitCast = builder.createBitcast( - shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType()); - return bitCast; -} - -static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, - mlir::Value mask, unsigned numElems) { - auto maskTy = cir::VectorType::get( - builder.getUIntNTy(1), cast(mask.getType()).getWidth()); - mlir::Value maskVec = builder.createBitcast(mask, maskTy); - - // If we have less than 8 elements, then the starting mask was an i8 and - // we need to extract down to the right number of elements. - if (numElems < 8) { - SmallVector indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (auto i : llvm::seq(0, numElems)) - indices.push_back(cir::IntAttr::get(i32Ty, i)); - - maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); - } - return maskVec; -} - -// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins. -// -// The vector is split into lanes of 8 word elements (16 bits). The lower or -// upper half of each lane, controlled by `isLow`, is shuffled in the following -// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The -// i-th field's value represents the resulting index of the i-th element in the -// half lane after shuffling. The other half of the lane remains unchanged. -static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, - const mlir::Value vec, - const mlir::Value immediate, - const mlir::Location loc, - const bool isLow) { - uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate); - - auto vecTy = cast(vec.getType()); - unsigned numElts = vecTy.getSize(); - - unsigned firstHalfStart = isLow ? 0 : 4; - unsigned secondHalfStart = 4 - firstHalfStart; - - // Splat the 8-bits of immediate 4 times to help the loop wrap around. - imm = (imm & 0xff) * 0x01010101; - - int64_t indices[32]; - for (unsigned l = 0; l != numElts; l += 8) { - for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) { - indices[l + i] = l + (imm & 3) + firstHalfStart; - imm >>= 2; - } - for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i) - indices[l + i] = l + i; - } - - return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts)); -} - -// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins. -// The shuffle mask is written to outIndices. -static void -computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, - uint32_t imm, const bool isShufP, - llvm::SmallVectorImpl &outIndices) { - auto vecTy = cast(vec.getType()); - unsigned numElts = vecTy.getSize(); - unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128; - unsigned numLaneElts = numElts / numLanes; - - // Splat the 8-bits of immediate 4 times to help the loop wrap around. - imm = (imm & 0xff) * 0x01010101; - - for (unsigned l = 0; l != numElts; l += numLaneElts) { - for (unsigned i = 0; i != numLaneElts; ++i) { - uint32_t idx = imm % numLaneElts; - imm /= numLaneElts; - if (isShufP && i >= (numLaneElts / 2)) - idx += numElts; - outIndices[l + i] = l + idx; - } - } - - outIndices.resize(numElts); -} - -static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, - mlir::Location loc, - const std::string &intrinsicName, - SmallVectorImpl &ops) { - - auto intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); - mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); - mlir::Type vecTy = lhsVec.getType(); - mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy, - mlir::ValueRange{lhsVec, rhsVec}); - return builder.createBitcast(resVec, ops[0].getType()); -} - -static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, - mlir::Location loc, - const std::string &intrinsicName, - SmallVectorImpl &ops) { - unsigned numElems = cast(ops[0].getType()).getWidth(); - - // Convert both operands to mask vectors. - mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems); - mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems); - - mlir::Type i32Ty = builder.getSInt32Ty(); - - // Create indices for extracting the first half of each vector. - SmallVector halfIndices; - for (auto i : llvm::seq(0, numElems / 2)) - halfIndices.push_back(cir::IntAttr::get(i32Ty, i)); - - // Extract first half of each vector. This gives better codegen than - // doing it in a single shuffle. - mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices); - mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices); - - // Create indices for concatenating the vectors. - // NOTE: Operands are swapped to match the intrinsic definition. - // After the half extraction, both vectors have numElems/2 elements. - // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] - // select from rhsHalf, and indices [numElems/2..numElems-1] select from - // lhsHalf. - SmallVector concatIndices; - for (auto i : llvm::seq(0, numElems)) - concatIndices.push_back(cir::IntAttr::get(i32Ty, i)); - - // Concat the vectors (RHS first, then LHS). - mlir::Value res = - builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices); - return builder.createBitcast(res, ops[0].getType()); -} - -static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, - mlir::Location loc, - cir::BinOpKind binOpKind, - SmallVectorImpl &ops, - bool invertLHS = false) { - unsigned numElts = cast(ops[0].getType()).getWidth(); - mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts); - mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts); - - if (invertLHS) - lhs = builder.createNot(lhs); - return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs), - ops[0].getType()); -} - -static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, - const std::string &intrinsicName, - SmallVectorImpl &ops) { - auto intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); - mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); - mlir::Type resTy = builder.getSInt32Ty(); - return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, - mlir::ValueRange{lhsVec, rhsVec}); -} - -static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, - mlir::Value vec, mlir::Value value, - mlir::Value indexOp) { - unsigned numElts = cast(vec.getType()).getSize(); - - uint64_t index = - indexOp.getDefiningOp().getIntValue().getZExtValue(); - - index &= numElts - 1; - - cir::ConstantOp indexVal = builder.getUInt64(index, loc); - - return cir::VecInsertOp::create(builder, loc, vec, value, indexVal); -} - -static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, - mlir::Location location, mlir::Value &op0, - mlir::Value &op1, mlir::Value &amt, - bool isRight) { - mlir::Type op0Ty = op0.getType(); - - // Amount may be scalar immediate, in which case create a splat vector. - // Funnel shifts amounts are treated as modulo and types are all power-of-2 - // so we only care about the lowest log2 bits anyway. - if (amt.getType() != op0Ty) { - auto vecTy = mlir::cast(op0Ty); - uint64_t numElems = vecTy.getSize(); - - auto amtTy = mlir::cast(amt.getType()); - auto vecElemTy = mlir::cast(vecTy.getElementType()); - - // If signed, cast to the same width but unsigned first to - // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`. - if (amtTy.isSigned()) { - cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth()); - amt = builder.createIntCast(amt, unsignedAmtTy); - } - cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth()); - amt = builder.createIntCast(amt, unsignedVecElemType); - amt = cir::VecSplatOp::create( - builder, location, cir::VectorType::get(unsignedVecElemType, numElems), - amt); - } - - const StringRef intrinsicName = isRight ? "fshr" : "fshl"; - return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty, - mlir::ValueRange{op0, op1, amt}); -} - -static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, - bool isSigned, - SmallVectorImpl &ops, - unsigned opTypePrimitiveSizeInBits) { - mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(), - opTypePrimitiveSizeInBits / 64); - mlir::Value lhs = builder.createBitcast(loc, ops[0], ty); - mlir::Value rhs = builder.createBitcast(loc, ops[1], ty); - if (isSigned) { - cir::ConstantOp shiftAmt = - builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32)); - cir::VecSplatOp shiftSplatVecOp = - cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult()); - mlir::Value shiftSplatValue = shiftSplatVecOp.getResult(); - // In CIR, right-shift operations are automatically lowered to either an - // arithmetic or logical shift depending on the operand type. The purpose - // of the shifts here is to propagate the sign bit of the 32-bit input - // into the upper bits of each vector lane. - lhs = builder.createShift(loc, lhs, shiftSplatValue, true); - lhs = builder.createShift(loc, lhs, shiftSplatValue, false); - rhs = builder.createShift(loc, rhs, shiftSplatValue, true); - rhs = builder.createShift(loc, rhs, shiftSplatValue, false); - } else { - cir::ConstantOp maskScalar = builder.getConstant( - loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff)); - cir::VecSplatOp mask = - cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult()); - // Clear the upper bits - lhs = builder.createAnd(loc, lhs, mask); - rhs = builder.createAnd(loc, rhs, mask); - } - return builder.createMul(loc, lhs, rhs); -} - -static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, - llvm::SmallVector ops, - bool isSigned) { - mlir::Value op0 = ops[0]; - mlir::Value op1 = ops[1]; - - cir::VectorType ty = cast(op0.getType()); - cir::IntType elementTy = cast(ty.getElementType()); - - uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7; - - cir::CmpOpKind pred; - switch (imm) { - case 0x0: - pred = cir::CmpOpKind::lt; - break; - case 0x1: - pred = cir::CmpOpKind::le; - break; - case 0x2: - pred = cir::CmpOpKind::gt; - break; - case 0x3: - pred = cir::CmpOpKind::ge; - break; - case 0x4: - pred = cir::CmpOpKind::eq; - break; - case 0x5: - pred = cir::CmpOpKind::ne; - break; - case 0x6: - return builder.getNullValue(ty, loc); // FALSE - case 0x7: { - llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth()); - return cir::VecSplatOp::create( - builder, loc, ty, - builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE - } - default: - llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); - } - - if ((!isSigned && elementTy.isSigned()) || - (isSigned && elementTy.isUnsigned())) { - elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth()) - : builder.getSIntNTy(elementTy.getWidth()); - ty = cir::VectorType::get(elementTy, ty.getSize()); - op0 = builder.createBitcast(op0, ty); - op1 = builder.createBitcast(op1, ty); - } - - return builder.createVecCompare(loc, pred, op0, op1); -} - -mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, - const CallExpr *expr) { - if (builtinID == Builtin::BI__builtin_cpu_is) { - cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is"); - return {}; - } - if (builtinID == Builtin::BI__builtin_cpu_supports) { - cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports"); - return {}; - } - if (builtinID == Builtin::BI__builtin_cpu_init) { - cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init"); - return {}; - } - - // Handle MSVC intrinsics before argument evaluation to prevent double - // evaluation. - assert(!cir::MissingFeatures::msvcBuiltins()); - - // Find out if any arguments are required to be integer constant expressions. - assert(!cir::MissingFeatures::handleBuiltinICEArguments()); - - // The operands of the builtin call - llvm::SmallVector ops; - - // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit - // is required to be a constant integer expression. - unsigned iceArguments = 0; - ASTContext::GetBuiltinTypeError error; - getContext().GetBuiltinType(builtinID, error, &iceArguments); - assert(error == ASTContext::GE_None && "Error while getting builtin type."); - - for (auto [idx, arg] : llvm::enumerate(expr->arguments())) - ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); - - CIRGenBuilderTy &builder = getBuilder(); - mlir::Type voidTy = builder.getVoidTy(); - - switch (builtinID) { - default: - return {}; - case X86::BI_mm_clflush: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.clflush", voidTy, ops[0]); - case X86::BI_mm_lfence: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.lfence", voidTy); - case X86::BI_mm_pause: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.pause", voidTy); - case X86::BI_mm_mfence: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.mfence", voidTy); - case X86::BI_mm_sfence: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse.sfence", voidTy); - case X86::BI_mm_prefetch: - case X86::BI__rdtsc: - case X86::BI__builtin_ia32_rdtscp: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - } - case X86::BI__builtin_ia32_lzcnt_u16: - case X86::BI__builtin_ia32_lzcnt_u32: - case X86::BI__builtin_ia32_lzcnt_u64: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value isZeroPoison = builder.getFalse(loc); - return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(), - mlir::ValueRange{ops[0], isZeroPoison}); - } - case X86::BI__builtin_ia32_tzcnt_u16: - case X86::BI__builtin_ia32_tzcnt_u32: - case X86::BI__builtin_ia32_tzcnt_u64: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value isZeroPoison = builder.getFalse(loc); - return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(), - mlir::ValueRange{ops[0], isZeroPoison}); - } - case X86::BI__builtin_ia32_undef128: - case X86::BI__builtin_ia32_undef256: - case X86::BI__builtin_ia32_undef512: - // The x86 definition of "undef" is not the same as the LLVM definition - // (PR32176). We leave optimizing away an unnecessary zero constant to the - // IR optimizer and backend. - // TODO: If we had a "freeze" IR instruction to generate a fixed undef - // value, we should use that here instead of a zero. - return builder.getNullValue(convertType(expr->getType()), - getLoc(expr->getExprLoc())); - case X86::BI__builtin_ia32_vec_ext_v4hi: - case X86::BI__builtin_ia32_vec_ext_v16qi: - case X86::BI__builtin_ia32_vec_ext_v8hi: - case X86::BI__builtin_ia32_vec_ext_v4si: - case X86::BI__builtin_ia32_vec_ext_v4sf: - case X86::BI__builtin_ia32_vec_ext_v2di: - case X86::BI__builtin_ia32_vec_ext_v32qi: - case X86::BI__builtin_ia32_vec_ext_v16hi: - case X86::BI__builtin_ia32_vec_ext_v8si: - case X86::BI__builtin_ia32_vec_ext_v4di: { - unsigned numElts = cast(ops[0].getType()).getSize(); - - uint64_t index = getZExtIntValueFromConstOp(ops[1]); - index &= numElts - 1; - - cir::ConstantOp indexVal = - builder.getUInt64(index, getLoc(expr->getExprLoc())); - - // These builtins exist so we can ensure the index is an ICE and in range. - // Otherwise we could just do this in the header file. - return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()), - ops[0], indexVal); - } - case X86::BI__builtin_ia32_vec_set_v4hi: - case X86::BI__builtin_ia32_vec_set_v16qi: - case X86::BI__builtin_ia32_vec_set_v8hi: - case X86::BI__builtin_ia32_vec_set_v4si: - case X86::BI__builtin_ia32_vec_set_v2di: - case X86::BI__builtin_ia32_vec_set_v32qi: - case X86::BI__builtin_ia32_vec_set_v16hi: - case X86::BI__builtin_ia32_vec_set_v8si: - case X86::BI__builtin_ia32_vec_set_v4di: { - return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1], - ops[2]); - } - case X86::BI__builtin_ia32_kunpckhi: - return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kunpackb", ops); - case X86::BI__builtin_ia32_kunpcksi: - return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kunpackw", ops); - case X86::BI__builtin_ia32_kunpckdi: - return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kunpackd", ops); - case X86::BI_mm_setcsr: - case X86::BI__builtin_ia32_ldmxcsr: { - mlir::Location loc = getLoc(expr->getExprLoc()); - Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); - builder.createStore(loc, ops[0], tmp); - return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr", - builder.getVoidTy(), tmp.getPointer()); - } - case X86::BI_mm_getcsr: - case X86::BI__builtin_ia32_stmxcsr: { - mlir::Location loc = getLoc(expr->getExprLoc()); - Address tmp = createMemTemp(expr->getType(), loc); - emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(), - tmp.getPointer()); - return builder.createLoad(loc, tmp); - } - case X86::BI__builtin_ia32_xsave: - case X86::BI__builtin_ia32_xsave64: - case X86::BI__builtin_ia32_xrstor: - case X86::BI__builtin_ia32_xrstor64: - case X86::BI__builtin_ia32_xsaveopt: - case X86::BI__builtin_ia32_xsaveopt64: - case X86::BI__builtin_ia32_xrstors: - case X86::BI__builtin_ia32_xrstors64: - case X86::BI__builtin_ia32_xsavec: - case X86::BI__builtin_ia32_xsavec64: - case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: - case X86::BI__builtin_ia32_xsetbv: - case X86::BI_xsetbv: - case X86::BI__builtin_ia32_xgetbv: - case X86::BI_xgetbv: - case X86::BI__builtin_ia32_storedqudi128_mask: - case X86::BI__builtin_ia32_storedqusi128_mask: - case X86::BI__builtin_ia32_storedquhi128_mask: - case X86::BI__builtin_ia32_storedquqi128_mask: - case X86::BI__builtin_ia32_storeupd128_mask: - case X86::BI__builtin_ia32_storeups128_mask: - case X86::BI__builtin_ia32_storedqudi256_mask: - case X86::BI__builtin_ia32_storedqusi256_mask: - case X86::BI__builtin_ia32_storedquhi256_mask: - case X86::BI__builtin_ia32_storedquqi256_mask: - case X86::BI__builtin_ia32_storeupd256_mask: - case X86::BI__builtin_ia32_storeups256_mask: - case X86::BI__builtin_ia32_storedqudi512_mask: - case X86::BI__builtin_ia32_storedqusi512_mask: - case X86::BI__builtin_ia32_storedquhi512_mask: - case X86::BI__builtin_ia32_storedquqi512_mask: - case X86::BI__builtin_ia32_storeupd512_mask: - case X86::BI__builtin_ia32_storeups512_mask: - case X86::BI__builtin_ia32_storesbf16128_mask: - case X86::BI__builtin_ia32_storesh128_mask: - case X86::BI__builtin_ia32_storess128_mask: - case X86::BI__builtin_ia32_storesd128_mask: - case X86::BI__builtin_ia32_cvtmask2b128: - case X86::BI__builtin_ia32_cvtmask2b256: - case X86::BI__builtin_ia32_cvtmask2b512: - case X86::BI__builtin_ia32_cvtmask2w128: - case X86::BI__builtin_ia32_cvtmask2w256: - case X86::BI__builtin_ia32_cvtmask2w512: - case X86::BI__builtin_ia32_cvtmask2d128: - case X86::BI__builtin_ia32_cvtmask2d256: - case X86::BI__builtin_ia32_cvtmask2d512: - case X86::BI__builtin_ia32_cvtmask2q128: - case X86::BI__builtin_ia32_cvtmask2q256: - case X86::BI__builtin_ia32_cvtmask2q512: - case X86::BI__builtin_ia32_cvtb2mask128: - case X86::BI__builtin_ia32_cvtb2mask256: - case X86::BI__builtin_ia32_cvtb2mask512: - case X86::BI__builtin_ia32_cvtw2mask128: - case X86::BI__builtin_ia32_cvtw2mask256: - case X86::BI__builtin_ia32_cvtw2mask512: - case X86::BI__builtin_ia32_cvtd2mask128: - case X86::BI__builtin_ia32_cvtd2mask256: - case X86::BI__builtin_ia32_cvtd2mask512: - case X86::BI__builtin_ia32_cvtq2mask128: - case X86::BI__builtin_ia32_cvtq2mask256: - case X86::BI__builtin_ia32_cvtq2mask512: - case X86::BI__builtin_ia32_cvtdq2ps512_mask: - case X86::BI__builtin_ia32_cvtqq2ps512_mask: - case X86::BI__builtin_ia32_cvtqq2pd512_mask: - case X86::BI__builtin_ia32_vcvtw2ph512_mask: - case X86::BI__builtin_ia32_vcvtdq2ph512_mask: - case X86::BI__builtin_ia32_vcvtqq2ph512_mask: - case X86::BI__builtin_ia32_cvtudq2ps512_mask: - case X86::BI__builtin_ia32_cvtuqq2ps512_mask: - case X86::BI__builtin_ia32_cvtuqq2pd512_mask: - case X86::BI__builtin_ia32_vcvtuw2ph512_mask: - case X86::BI__builtin_ia32_vcvtudq2ph512_mask: - case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: - case X86::BI__builtin_ia32_vfmaddsh3_mask: - case X86::BI__builtin_ia32_vfmaddss3_mask: - case X86::BI__builtin_ia32_vfmaddsd3_mask: - case X86::BI__builtin_ia32_vfmaddsh3_maskz: - case X86::BI__builtin_ia32_vfmaddss3_maskz: - case X86::BI__builtin_ia32_vfmaddsd3_maskz: - case X86::BI__builtin_ia32_vfmaddsh3_mask3: - case X86::BI__builtin_ia32_vfmaddss3_mask3: - case X86::BI__builtin_ia32_vfmaddsd3_mask3: - case X86::BI__builtin_ia32_vfmsubsh3_mask3: - case X86::BI__builtin_ia32_vfmsubss3_mask3: - case X86::BI__builtin_ia32_vfmsubsd3_mask3: - case X86::BI__builtin_ia32_vfmaddph512_mask: - case X86::BI__builtin_ia32_vfmaddph512_maskz: - case X86::BI__builtin_ia32_vfmaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddps512_mask: - case X86::BI__builtin_ia32_vfmaddps512_maskz: - case X86::BI__builtin_ia32_vfmaddps512_mask3: - case X86::BI__builtin_ia32_vfmsubps512_mask3: - case X86::BI__builtin_ia32_vfmaddpd512_mask: - case X86::BI__builtin_ia32_vfmaddpd512_maskz: - case X86::BI__builtin_ia32_vfmaddpd512_mask3: - case X86::BI__builtin_ia32_vfmsubpd512_mask3: - case X86::BI__builtin_ia32_vfmsubph512_mask3: - case X86::BI__builtin_ia32_vfmaddsubph512_mask: - case X86::BI__builtin_ia32_vfmaddsubph512_maskz: - case X86::BI__builtin_ia32_vfmaddsubph512_mask3: - case X86::BI__builtin_ia32_vfmsubaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddsubps512_mask: - case X86::BI__builtin_ia32_vfmaddsubps512_maskz: - case X86::BI__builtin_ia32_vfmaddsubps512_mask3: - case X86::BI__builtin_ia32_vfmsubaddps512_mask3: - case X86::BI__builtin_ia32_vfmaddsubpd512_mask: - case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: - case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: - case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: - case X86::BI__builtin_ia32_movdqa32store128_mask: - case X86::BI__builtin_ia32_movdqa64store128_mask: - case X86::BI__builtin_ia32_storeaps128_mask: - case X86::BI__builtin_ia32_storeapd128_mask: - case X86::BI__builtin_ia32_movdqa32store256_mask: - case X86::BI__builtin_ia32_movdqa64store256_mask: - case X86::BI__builtin_ia32_storeaps256_mask: - case X86::BI__builtin_ia32_storeapd256_mask: - case X86::BI__builtin_ia32_movdqa32store512_mask: - case X86::BI__builtin_ia32_movdqa64store512_mask: - case X86::BI__builtin_ia32_storeaps512_mask: - case X86::BI__builtin_ia32_storeapd512_mask: - case X86::BI__builtin_ia32_loadups128_mask: - case X86::BI__builtin_ia32_loadups256_mask: - case X86::BI__builtin_ia32_loadups512_mask: - case X86::BI__builtin_ia32_loadupd128_mask: - case X86::BI__builtin_ia32_loadupd256_mask: - case X86::BI__builtin_ia32_loadupd512_mask: - case X86::BI__builtin_ia32_loaddquqi128_mask: - case X86::BI__builtin_ia32_loaddquqi256_mask: - case X86::BI__builtin_ia32_loaddquqi512_mask: - case X86::BI__builtin_ia32_loaddquhi128_mask: - case X86::BI__builtin_ia32_loaddquhi256_mask: - case X86::BI__builtin_ia32_loaddquhi512_mask: - case X86::BI__builtin_ia32_loaddqusi128_mask: - case X86::BI__builtin_ia32_loaddqusi256_mask: - case X86::BI__builtin_ia32_loaddqusi512_mask: - case X86::BI__builtin_ia32_loaddqudi128_mask: - case X86::BI__builtin_ia32_loaddqudi256_mask: - case X86::BI__builtin_ia32_loaddqudi512_mask: - case X86::BI__builtin_ia32_loadsbf16128_mask: - case X86::BI__builtin_ia32_loadsh128_mask: - case X86::BI__builtin_ia32_loadss128_mask: - case X86::BI__builtin_ia32_loadsd128_mask: - case X86::BI__builtin_ia32_loadaps128_mask: - case X86::BI__builtin_ia32_loadaps256_mask: - case X86::BI__builtin_ia32_loadaps512_mask: - case X86::BI__builtin_ia32_loadapd128_mask: - case X86::BI__builtin_ia32_loadapd256_mask: - case X86::BI__builtin_ia32_loadapd512_mask: - case X86::BI__builtin_ia32_movdqa32load128_mask: - case X86::BI__builtin_ia32_movdqa32load256_mask: - case X86::BI__builtin_ia32_movdqa32load512_mask: - case X86::BI__builtin_ia32_movdqa64load128_mask: - case X86::BI__builtin_ia32_movdqa64load256_mask: - case X86::BI__builtin_ia32_movdqa64load512_mask: - case X86::BI__builtin_ia32_expandloaddf128_mask: - case X86::BI__builtin_ia32_expandloaddf256_mask: - case X86::BI__builtin_ia32_expandloaddf512_mask: - case X86::BI__builtin_ia32_expandloadsf128_mask: - case X86::BI__builtin_ia32_expandloadsf256_mask: - case X86::BI__builtin_ia32_expandloadsf512_mask: - case X86::BI__builtin_ia32_expandloaddi128_mask: - case X86::BI__builtin_ia32_expandloaddi256_mask: - case X86::BI__builtin_ia32_expandloaddi512_mask: - case X86::BI__builtin_ia32_expandloadsi128_mask: - case X86::BI__builtin_ia32_expandloadsi256_mask: - case X86::BI__builtin_ia32_expandloadsi512_mask: - case X86::BI__builtin_ia32_expandloadhi128_mask: - case X86::BI__builtin_ia32_expandloadhi256_mask: - case X86::BI__builtin_ia32_expandloadhi512_mask: - case X86::BI__builtin_ia32_expandloadqi128_mask: - case X86::BI__builtin_ia32_expandloadqi256_mask: - case X86::BI__builtin_ia32_expandloadqi512_mask: - case X86::BI__builtin_ia32_compressstoredf128_mask: - case X86::BI__builtin_ia32_compressstoredf256_mask: - case X86::BI__builtin_ia32_compressstoredf512_mask: - case X86::BI__builtin_ia32_compressstoresf128_mask: - case X86::BI__builtin_ia32_compressstoresf256_mask: - case X86::BI__builtin_ia32_compressstoresf512_mask: - case X86::BI__builtin_ia32_compressstoredi128_mask: - case X86::BI__builtin_ia32_compressstoredi256_mask: - case X86::BI__builtin_ia32_compressstoredi512_mask: - case X86::BI__builtin_ia32_compressstoresi128_mask: - case X86::BI__builtin_ia32_compressstoresi256_mask: - case X86::BI__builtin_ia32_compressstoresi512_mask: - case X86::BI__builtin_ia32_compressstorehi128_mask: - case X86::BI__builtin_ia32_compressstorehi256_mask: - case X86::BI__builtin_ia32_compressstorehi512_mask: - case X86::BI__builtin_ia32_compressstoreqi128_mask: - case X86::BI__builtin_ia32_compressstoreqi256_mask: - case X86::BI__builtin_ia32_compressstoreqi512_mask: - case X86::BI__builtin_ia32_expanddf128_mask: - case X86::BI__builtin_ia32_expanddf256_mask: - case X86::BI__builtin_ia32_expanddf512_mask: - case X86::BI__builtin_ia32_expandsf128_mask: - case X86::BI__builtin_ia32_expandsf256_mask: - case X86::BI__builtin_ia32_expandsf512_mask: - case X86::BI__builtin_ia32_expanddi128_mask: - case X86::BI__builtin_ia32_expanddi256_mask: - case X86::BI__builtin_ia32_expanddi512_mask: - case X86::BI__builtin_ia32_expandsi128_mask: - case X86::BI__builtin_ia32_expandsi256_mask: - case X86::BI__builtin_ia32_expandsi512_mask: - case X86::BI__builtin_ia32_expandhi128_mask: - case X86::BI__builtin_ia32_expandhi256_mask: - case X86::BI__builtin_ia32_expandhi512_mask: - case X86::BI__builtin_ia32_expandqi128_mask: - case X86::BI__builtin_ia32_expandqi256_mask: - case X86::BI__builtin_ia32_expandqi512_mask: - case X86::BI__builtin_ia32_compressdf128_mask: - case X86::BI__builtin_ia32_compressdf256_mask: - case X86::BI__builtin_ia32_compressdf512_mask: - case X86::BI__builtin_ia32_compresssf128_mask: - case X86::BI__builtin_ia32_compresssf256_mask: - case X86::BI__builtin_ia32_compresssf512_mask: - case X86::BI__builtin_ia32_compressdi128_mask: - case X86::BI__builtin_ia32_compressdi256_mask: - case X86::BI__builtin_ia32_compressdi512_mask: - case X86::BI__builtin_ia32_compresssi128_mask: - case X86::BI__builtin_ia32_compresssi256_mask: - case X86::BI__builtin_ia32_compresssi512_mask: - case X86::BI__builtin_ia32_compresshi128_mask: - case X86::BI__builtin_ia32_compresshi256_mask: - case X86::BI__builtin_ia32_compresshi512_mask: - case X86::BI__builtin_ia32_compressqi128_mask: - case X86::BI__builtin_ia32_compressqi256_mask: - case X86::BI__builtin_ia32_compressqi512_mask: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_gather3div2df: - case X86::BI__builtin_ia32_gather3div2di: - case X86::BI__builtin_ia32_gather3div4df: - case X86::BI__builtin_ia32_gather3div4di: - case X86::BI__builtin_ia32_gather3div4sf: - case X86::BI__builtin_ia32_gather3div4si: - case X86::BI__builtin_ia32_gather3div8sf: - case X86::BI__builtin_ia32_gather3div8si: - case X86::BI__builtin_ia32_gather3siv2df: - case X86::BI__builtin_ia32_gather3siv2di: - case X86::BI__builtin_ia32_gather3siv4df: - case X86::BI__builtin_ia32_gather3siv4di: - case X86::BI__builtin_ia32_gather3siv4sf: - case X86::BI__builtin_ia32_gather3siv4si: - case X86::BI__builtin_ia32_gather3siv8sf: - case X86::BI__builtin_ia32_gather3siv8si: - case X86::BI__builtin_ia32_gathersiv8df: - case X86::BI__builtin_ia32_gathersiv16sf: - case X86::BI__builtin_ia32_gatherdiv8df: - case X86::BI__builtin_ia32_gatherdiv16sf: - case X86::BI__builtin_ia32_gathersiv8di: - case X86::BI__builtin_ia32_gathersiv16si: - case X86::BI__builtin_ia32_gatherdiv8di: - case X86::BI__builtin_ia32_gatherdiv16si: { - StringRef intrinsicName; - switch (builtinID) { - default: - llvm_unreachable("Unexpected builtin"); - case X86::BI__builtin_ia32_gather3div2df: - intrinsicName = "x86.avx512.mask.gather3div2.df"; - break; - case X86::BI__builtin_ia32_gather3div2di: - intrinsicName = "x86.avx512.mask.gather3div2.di"; - break; - case X86::BI__builtin_ia32_gather3div4df: - intrinsicName = "x86.avx512.mask.gather3div4.df"; - break; - case X86::BI__builtin_ia32_gather3div4di: - intrinsicName = "x86.avx512.mask.gather3div4.di"; - break; - case X86::BI__builtin_ia32_gather3div4sf: - intrinsicName = "x86.avx512.mask.gather3div4.sf"; - break; - case X86::BI__builtin_ia32_gather3div4si: - intrinsicName = "x86.avx512.mask.gather3div4.si"; - break; - case X86::BI__builtin_ia32_gather3div8sf: - intrinsicName = "x86.avx512.mask.gather3div8.sf"; - break; - case X86::BI__builtin_ia32_gather3div8si: - intrinsicName = "x86.avx512.mask.gather3div8.si"; - break; - case X86::BI__builtin_ia32_gather3siv2df: - intrinsicName = "x86.avx512.mask.gather3siv2.df"; - break; - case X86::BI__builtin_ia32_gather3siv2di: - intrinsicName = "x86.avx512.mask.gather3siv2.di"; - break; - case X86::BI__builtin_ia32_gather3siv4df: - intrinsicName = "x86.avx512.mask.gather3siv4.df"; - break; - case X86::BI__builtin_ia32_gather3siv4di: - intrinsicName = "x86.avx512.mask.gather3siv4.di"; - break; - case X86::BI__builtin_ia32_gather3siv4sf: - intrinsicName = "x86.avx512.mask.gather3siv4.sf"; - break; - case X86::BI__builtin_ia32_gather3siv4si: - intrinsicName = "x86.avx512.mask.gather3siv4.si"; - break; - case X86::BI__builtin_ia32_gather3siv8sf: - intrinsicName = "x86.avx512.mask.gather3siv8.sf"; - break; - case X86::BI__builtin_ia32_gather3siv8si: - intrinsicName = "x86.avx512.mask.gather3siv8.si"; - break; - case X86::BI__builtin_ia32_gathersiv8df: - intrinsicName = "x86.avx512.mask.gather.dpd.512"; - break; - case X86::BI__builtin_ia32_gathersiv16sf: - intrinsicName = "x86.avx512.mask.gather.dps.512"; - break; - case X86::BI__builtin_ia32_gatherdiv8df: - intrinsicName = "x86.avx512.mask.gather.qpd.512"; - break; - case X86::BI__builtin_ia32_gatherdiv16sf: - intrinsicName = "x86.avx512.mask.gather.qps.512"; - break; - case X86::BI__builtin_ia32_gathersiv8di: - intrinsicName = "x86.avx512.mask.gather.dpq.512"; - break; - case X86::BI__builtin_ia32_gathersiv16si: - intrinsicName = "x86.avx512.mask.gather.dpi.512"; - break; - case X86::BI__builtin_ia32_gatherdiv8di: - intrinsicName = "x86.avx512.mask.gather.qpq.512"; - break; - case X86::BI__builtin_ia32_gatherdiv16si: - intrinsicName = "x86.avx512.mask.gather.qpi.512"; - break; - } - - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned minElts = - std::min(cast(ops[0].getType()).getSize(), - cast(ops[2].getType()).getSize()); - ops[3] = getMaskVecValue(builder, loc, ops[3], minElts); - return emitIntrinsicCallOp(builder, loc, intrinsicName, - convertType(expr->getType()), ops); - } - case X86::BI__builtin_ia32_scattersiv8df: - case X86::BI__builtin_ia32_scattersiv16sf: - case X86::BI__builtin_ia32_scatterdiv8df: - case X86::BI__builtin_ia32_scatterdiv16sf: - case X86::BI__builtin_ia32_scattersiv8di: - case X86::BI__builtin_ia32_scattersiv16si: - case X86::BI__builtin_ia32_scatterdiv8di: - case X86::BI__builtin_ia32_scatterdiv16si: - case X86::BI__builtin_ia32_scatterdiv2df: - case X86::BI__builtin_ia32_scatterdiv2di: - case X86::BI__builtin_ia32_scatterdiv4df: - case X86::BI__builtin_ia32_scatterdiv4di: - case X86::BI__builtin_ia32_scatterdiv4sf: - case X86::BI__builtin_ia32_scatterdiv4si: - case X86::BI__builtin_ia32_scatterdiv8sf: - case X86::BI__builtin_ia32_scatterdiv8si: - case X86::BI__builtin_ia32_scattersiv2df: - case X86::BI__builtin_ia32_scattersiv2di: - case X86::BI__builtin_ia32_scattersiv4df: - case X86::BI__builtin_ia32_scattersiv4di: - case X86::BI__builtin_ia32_scattersiv4sf: - case X86::BI__builtin_ia32_scattersiv4si: - case X86::BI__builtin_ia32_scattersiv8sf: - case X86::BI__builtin_ia32_scattersiv8si: { - llvm::StringRef intrinsicName; - switch (builtinID) { - default: - llvm_unreachable("Unexpected builtin"); - case X86::BI__builtin_ia32_scattersiv8df: - intrinsicName = "x86.avx512.mask.scatter.dpd.512"; - break; - case X86::BI__builtin_ia32_scattersiv16sf: - intrinsicName = "x86.avx512.mask.scatter.dps.512"; - break; - case X86::BI__builtin_ia32_scatterdiv8df: - intrinsicName = "x86.avx512.mask.scatter.qpd.512"; - break; - case X86::BI__builtin_ia32_scatterdiv16sf: - intrinsicName = "x86.avx512.mask.scatter.qps.512"; - break; - case X86::BI__builtin_ia32_scattersiv8di: - intrinsicName = "x86.avx512.mask.scatter.dpq.512"; - break; - case X86::BI__builtin_ia32_scattersiv16si: - intrinsicName = "x86.avx512.mask.scatter.dpi.512"; - break; - case X86::BI__builtin_ia32_scatterdiv8di: - intrinsicName = "x86.avx512.mask.scatter.qpq.512"; - break; - case X86::BI__builtin_ia32_scatterdiv16si: - intrinsicName = "x86.avx512.mask.scatter.qpi.512"; - break; - case X86::BI__builtin_ia32_scatterdiv2df: - intrinsicName = "x86.avx512.mask.scatterdiv2.df"; - break; - case X86::BI__builtin_ia32_scatterdiv2di: - intrinsicName = "x86.avx512.mask.scatterdiv2.di"; - break; - case X86::BI__builtin_ia32_scatterdiv4df: - intrinsicName = "x86.avx512.mask.scatterdiv4.df"; - break; - case X86::BI__builtin_ia32_scatterdiv4di: - intrinsicName = "x86.avx512.mask.scatterdiv4.di"; - break; - case X86::BI__builtin_ia32_scatterdiv4sf: - intrinsicName = "x86.avx512.mask.scatterdiv4.sf"; - break; - case X86::BI__builtin_ia32_scatterdiv4si: - intrinsicName = "x86.avx512.mask.scatterdiv4.si"; - break; - case X86::BI__builtin_ia32_scatterdiv8sf: - intrinsicName = "x86.avx512.mask.scatterdiv8.sf"; - break; - case X86::BI__builtin_ia32_scatterdiv8si: - intrinsicName = "x86.avx512.mask.scatterdiv8.si"; - break; - case X86::BI__builtin_ia32_scattersiv2df: - intrinsicName = "x86.avx512.mask.scattersiv2.df"; - break; - case X86::BI__builtin_ia32_scattersiv2di: - intrinsicName = "x86.avx512.mask.scattersiv2.di"; - break; - case X86::BI__builtin_ia32_scattersiv4df: - intrinsicName = "x86.avx512.mask.scattersiv4.df"; - break; - case X86::BI__builtin_ia32_scattersiv4di: - intrinsicName = "x86.avx512.mask.scattersiv4.di"; - break; - case X86::BI__builtin_ia32_scattersiv4sf: - intrinsicName = "x86.avx512.mask.scattersiv4.sf"; - break; - case X86::BI__builtin_ia32_scattersiv4si: - intrinsicName = "x86.avx512.mask.scattersiv4.si"; - break; - case X86::BI__builtin_ia32_scattersiv8sf: - intrinsicName = "x86.avx512.mask.scattersiv8.sf"; - break; - case X86::BI__builtin_ia32_scattersiv8si: - intrinsicName = "x86.avx512.mask.scattersiv8.si"; - break; - } - - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned minElts = - std::min(cast(ops[2].getType()).getSize(), - cast(ops[3].getType()).getSize()); - ops[1] = getMaskVecValue(builder, loc, ops[1], minElts); - - return emitIntrinsicCallOp(builder, loc, intrinsicName, - convertType(expr->getType()), ops); - } - case X86::BI__builtin_ia32_vextractf128_pd256: - case X86::BI__builtin_ia32_vextractf128_ps256: - case X86::BI__builtin_ia32_vextractf128_si256: - case X86::BI__builtin_ia32_extract128i256: - case X86::BI__builtin_ia32_extractf64x4_mask: - case X86::BI__builtin_ia32_extractf32x4_mask: - case X86::BI__builtin_ia32_extracti64x4_mask: - case X86::BI__builtin_ia32_extracti32x4_mask: - case X86::BI__builtin_ia32_extractf32x8_mask: - case X86::BI__builtin_ia32_extracti32x8_mask: - case X86::BI__builtin_ia32_extractf32x4_256_mask: - case X86::BI__builtin_ia32_extracti32x4_256_mask: - case X86::BI__builtin_ia32_extractf64x2_256_mask: - case X86::BI__builtin_ia32_extracti64x2_256_mask: - case X86::BI__builtin_ia32_extractf64x2_512_mask: - case X86::BI__builtin_ia32_extracti64x2_512_mask: - case X86::BI__builtin_ia32_vinsertf128_pd256: - case X86::BI__builtin_ia32_vinsertf128_ps256: - case X86::BI__builtin_ia32_vinsertf128_si256: - case X86::BI__builtin_ia32_insert128i256: - case X86::BI__builtin_ia32_insertf64x4: - case X86::BI__builtin_ia32_insertf32x4: - case X86::BI__builtin_ia32_inserti64x4: - case X86::BI__builtin_ia32_inserti32x4: - case X86::BI__builtin_ia32_insertf32x8: - case X86::BI__builtin_ia32_inserti32x8: - case X86::BI__builtin_ia32_insertf32x4_256: - case X86::BI__builtin_ia32_inserti32x4_256: - case X86::BI__builtin_ia32_insertf64x2_256: - case X86::BI__builtin_ia32_inserti64x2_256: - case X86::BI__builtin_ia32_insertf64x2_512: - case X86::BI__builtin_ia32_inserti64x2_512: - case X86::BI__builtin_ia32_pmovqd512_mask: - case X86::BI__builtin_ia32_pmovwb512_mask: - case X86::BI__builtin_ia32_pblendw128: - case X86::BI__builtin_ia32_blendpd: - case X86::BI__builtin_ia32_blendps: - case X86::BI__builtin_ia32_blendpd256: - case X86::BI__builtin_ia32_blendps256: - case X86::BI__builtin_ia32_pblendw256: - case X86::BI__builtin_ia32_pblendd128: - case X86::BI__builtin_ia32_pblendd256: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_pshuflw: - case X86::BI__builtin_ia32_pshuflw256: - case X86::BI__builtin_ia32_pshuflw512: - return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), - true); - case X86::BI__builtin_ia32_pshufhw: - case X86::BI__builtin_ia32_pshufhw256: - case X86::BI__builtin_ia32_pshufhw512: - return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), - false); - case X86::BI__builtin_ia32_pshufd: - case X86::BI__builtin_ia32_pshufd256: - case X86::BI__builtin_ia32_pshufd512: - case X86::BI__builtin_ia32_vpermilpd: - case X86::BI__builtin_ia32_vpermilps: - case X86::BI__builtin_ia32_vpermilpd256: - case X86::BI__builtin_ia32_vpermilps256: - case X86::BI__builtin_ia32_vpermilpd512: - case X86::BI__builtin_ia32_vpermilps512: { - const uint32_t imm = getSExtIntValueFromConstOp(ops[1]); - - llvm::SmallVector mask(16); - computeFullLaneShuffleMask(*this, ops[0], imm, false, mask); - - return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask); - } - case X86::BI__builtin_ia32_shufpd: - case X86::BI__builtin_ia32_shufpd256: - case X86::BI__builtin_ia32_shufpd512: - case X86::BI__builtin_ia32_shufps: - case X86::BI__builtin_ia32_shufps256: - case X86::BI__builtin_ia32_shufps512: { - const uint32_t imm = getZExtIntValueFromConstOp(ops[2]); - - llvm::SmallVector mask(16); - computeFullLaneShuffleMask(*this, ops[0], imm, true, mask); - - return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1], - mask); - } - case X86::BI__builtin_ia32_permdi256: - case X86::BI__builtin_ia32_permdf256: - case X86::BI__builtin_ia32_permdi512: - case X86::BI__builtin_ia32_permdf512: - case X86::BI__builtin_ia32_palignr128: - case X86::BI__builtin_ia32_palignr256: - case X86::BI__builtin_ia32_palignr512: - case X86::BI__builtin_ia32_alignd128: - case X86::BI__builtin_ia32_alignd256: - case X86::BI__builtin_ia32_alignd512: - case X86::BI__builtin_ia32_alignq128: - case X86::BI__builtin_ia32_alignq256: - case X86::BI__builtin_ia32_alignq512: - case X86::BI__builtin_ia32_shuf_f32x4_256: - case X86::BI__builtin_ia32_shuf_f64x2_256: - case X86::BI__builtin_ia32_shuf_i32x4_256: - case X86::BI__builtin_ia32_shuf_i64x2_256: - case X86::BI__builtin_ia32_shuf_f32x4: - case X86::BI__builtin_ia32_shuf_f64x2: - case X86::BI__builtin_ia32_shuf_i32x4: - case X86::BI__builtin_ia32_shuf_i64x2: - case X86::BI__builtin_ia32_vperm2f128_pd256: - case X86::BI__builtin_ia32_vperm2f128_ps256: - case X86::BI__builtin_ia32_vperm2f128_si256: - case X86::BI__builtin_ia32_permti256: - case X86::BI__builtin_ia32_pslldqi128_byteshift: - case X86::BI__builtin_ia32_pslldqi256_byteshift: - case X86::BI__builtin_ia32_pslldqi512_byteshift: - case X86::BI__builtin_ia32_psrldqi128_byteshift: - case X86::BI__builtin_ia32_psrldqi256_byteshift: - case X86::BI__builtin_ia32_psrldqi512_byteshift: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_kshiftliqi: - case X86::BI__builtin_ia32_kshiftlihi: - case X86::BI__builtin_ia32_kshiftlisi: - case X86::BI__builtin_ia32_kshiftlidi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned shiftVal = - ops[1].getDefiningOp().getIntValue().getZExtValue() & - 0xff; - unsigned numElems = cast(ops[0].getType()).getWidth(); - - if (shiftVal >= numElems) - return builder.getNullValue(ops[0].getType(), loc); - - mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); - - SmallVector indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (auto i : llvm::seq(0, numElems)) - indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal)); - - mlir::Value zero = builder.getNullValue(in.getType(), loc); - mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices); - return builder.createBitcast(sv, ops[0].getType()); - } - case X86::BI__builtin_ia32_kshiftriqi: - case X86::BI__builtin_ia32_kshiftrihi: - case X86::BI__builtin_ia32_kshiftrisi: - case X86::BI__builtin_ia32_kshiftridi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned shiftVal = - ops[1].getDefiningOp().getIntValue().getZExtValue() & - 0xff; - unsigned numElems = cast(ops[0].getType()).getWidth(); - - if (shiftVal >= numElems) - return builder.getNullValue(ops[0].getType(), loc); - - mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); - - SmallVector indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (auto i : llvm::seq(0, numElems)) - indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal)); - - mlir::Value zero = builder.getNullValue(in.getType(), loc); - mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices); - return builder.createBitcast(sv, ops[0].getType()); - } - case X86::BI__builtin_ia32_vprotbi: - case X86::BI__builtin_ia32_vprotwi: - case X86::BI__builtin_ia32_vprotdi: - case X86::BI__builtin_ia32_vprotqi: - case X86::BI__builtin_ia32_prold128: - case X86::BI__builtin_ia32_prold256: - case X86::BI__builtin_ia32_prold512: - case X86::BI__builtin_ia32_prolq128: - case X86::BI__builtin_ia32_prolq256: - case X86::BI__builtin_ia32_prolq512: - return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], - ops[0], ops[1], false); - case X86::BI__builtin_ia32_prord128: - case X86::BI__builtin_ia32_prord256: - case X86::BI__builtin_ia32_prord512: - case X86::BI__builtin_ia32_prorq128: - case X86::BI__builtin_ia32_prorq256: - case X86::BI__builtin_ia32_prorq512: - return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], - ops[0], ops[1], true); - case X86::BI__builtin_ia32_selectb_128: - case X86::BI__builtin_ia32_selectb_256: - case X86::BI__builtin_ia32_selectb_512: - case X86::BI__builtin_ia32_selectw_128: - case X86::BI__builtin_ia32_selectw_256: - case X86::BI__builtin_ia32_selectw_512: - case X86::BI__builtin_ia32_selectd_128: - case X86::BI__builtin_ia32_selectd_256: - case X86::BI__builtin_ia32_selectd_512: - case X86::BI__builtin_ia32_selectq_128: - case X86::BI__builtin_ia32_selectq_256: - case X86::BI__builtin_ia32_selectq_512: - case X86::BI__builtin_ia32_selectph_128: - case X86::BI__builtin_ia32_selectph_256: - case X86::BI__builtin_ia32_selectph_512: - case X86::BI__builtin_ia32_selectpbf_128: - case X86::BI__builtin_ia32_selectpbf_256: - case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectps_128: - case X86::BI__builtin_ia32_selectps_256: - case X86::BI__builtin_ia32_selectps_512: - case X86::BI__builtin_ia32_selectpd_128: - case X86::BI__builtin_ia32_selectpd_256: - case X86::BI__builtin_ia32_selectpd_512: - case X86::BI__builtin_ia32_selectsh_128: - case X86::BI__builtin_ia32_selectsbf_128: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: - case X86::BI__builtin_ia32_cmpb128_mask: - case X86::BI__builtin_ia32_cmpb256_mask: - case X86::BI__builtin_ia32_cmpb512_mask: - case X86::BI__builtin_ia32_cmpw128_mask: - case X86::BI__builtin_ia32_cmpw256_mask: - case X86::BI__builtin_ia32_cmpw512_mask: - case X86::BI__builtin_ia32_cmpd128_mask: - case X86::BI__builtin_ia32_cmpd256_mask: - case X86::BI__builtin_ia32_cmpd512_mask: - case X86::BI__builtin_ia32_cmpq128_mask: - case X86::BI__builtin_ia32_cmpq256_mask: - case X86::BI__builtin_ia32_cmpq512_mask: - case X86::BI__builtin_ia32_ucmpb128_mask: - case X86::BI__builtin_ia32_ucmpb256_mask: - case X86::BI__builtin_ia32_ucmpb512_mask: - case X86::BI__builtin_ia32_ucmpw128_mask: - case X86::BI__builtin_ia32_ucmpw256_mask: - case X86::BI__builtin_ia32_ucmpw512_mask: - case X86::BI__builtin_ia32_ucmpd128_mask: - case X86::BI__builtin_ia32_ucmpd256_mask: - case X86::BI__builtin_ia32_ucmpd512_mask: - case X86::BI__builtin_ia32_ucmpq128_mask: - case X86::BI__builtin_ia32_ucmpq256_mask: - case X86::BI__builtin_ia32_ucmpq512_mask: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_vpcomb: - case X86::BI__builtin_ia32_vpcomw: - case X86::BI__builtin_ia32_vpcomd: - case X86::BI__builtin_ia32_vpcomq: - return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true); - case X86::BI__builtin_ia32_vpcomub: - case X86::BI__builtin_ia32_vpcomuw: - case X86::BI__builtin_ia32_vpcomud: - case X86::BI__builtin_ia32_vpcomuq: - return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false); - case X86::BI__builtin_ia32_kortestcqi: - case X86::BI__builtin_ia32_kortestchi: - case X86::BI__builtin_ia32_kortestcsi: - case X86::BI__builtin_ia32_kortestcdi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - cir::IntType ty = cast(ops[0].getType()); - mlir::Value allOnesOp = - builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth())); - mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); - mlir::Value cmp = - cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp); - return builder.createCast(cir::CastKind::bool_to_int, cmp, - cgm.convertType(expr->getType())); - } - case X86::BI__builtin_ia32_kortestzqi: - case X86::BI__builtin_ia32_kortestzhi: - case X86::BI__builtin_ia32_kortestzsi: - case X86::BI__builtin_ia32_kortestzdi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - cir::IntType ty = cast(ops[0].getType()); - mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult(); - mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); - mlir::Value cmp = - cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp); - return builder.createCast(cir::CastKind::bool_to_int, cmp, - cgm.convertType(expr->getType())); - } - case X86::BI__builtin_ia32_ktestcqi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.b", ops); - case X86::BI__builtin_ia32_ktestzqi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.b", ops); - case X86::BI__builtin_ia32_ktestchi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.w", ops); - case X86::BI__builtin_ia32_ktestzhi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.w", ops); - case X86::BI__builtin_ia32_ktestcsi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.d", ops); - case X86::BI__builtin_ia32_ktestzsi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.d", ops); - case X86::BI__builtin_ia32_ktestcdi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.q", ops); - case X86::BI__builtin_ia32_ktestzdi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.q", ops); - case X86::BI__builtin_ia32_kaddqi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.b", ops); - case X86::BI__builtin_ia32_kaddhi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.w", ops); - case X86::BI__builtin_ia32_kaddsi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.d", ops); - case X86::BI__builtin_ia32_kadddi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.q", ops); - case X86::BI__builtin_ia32_kandqi: - case X86::BI__builtin_ia32_kandhi: - case X86::BI__builtin_ia32_kandsi: - case X86::BI__builtin_ia32_kanddi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::And, ops); - case X86::BI__builtin_ia32_kandnqi: - case X86::BI__builtin_ia32_kandnhi: - case X86::BI__builtin_ia32_kandnsi: - case X86::BI__builtin_ia32_kandndi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::And, ops, true); - case X86::BI__builtin_ia32_korqi: - case X86::BI__builtin_ia32_korhi: - case X86::BI__builtin_ia32_korsi: - case X86::BI__builtin_ia32_kordi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::Or, ops); - case X86::BI__builtin_ia32_kxnorqi: - case X86::BI__builtin_ia32_kxnorhi: - case X86::BI__builtin_ia32_kxnorsi: - case X86::BI__builtin_ia32_kxnordi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::Xor, ops, true); - case X86::BI__builtin_ia32_kxorqi: - case X86::BI__builtin_ia32_kxorhi: - case X86::BI__builtin_ia32_kxorsi: - case X86::BI__builtin_ia32_kxordi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::Xor, ops); - case X86::BI__builtin_ia32_knotqi: - case X86::BI__builtin_ia32_knothi: - case X86::BI__builtin_ia32_knotsi: - case X86::BI__builtin_ia32_knotdi: { - cir::IntType intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value resVec = - getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); - return builder.createBitcast(builder.createNot(resVec), ops[0].getType()); - } - case X86::BI__builtin_ia32_kmovb: - case X86::BI__builtin_ia32_kmovw: - case X86::BI__builtin_ia32_kmovd: - case X86::BI__builtin_ia32_kmovq: { - // Bitcast to vXi1 type and then back to integer. This gets the mask - // register type into the IR, but might be optimized out depending on - // what's around it. - cir::IntType intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value resVec = - getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); - return builder.createBitcast(resVec, ops[0].getType()); - } - case X86::BI__builtin_ia32_sqrtsh_round_mask: - case X86::BI__builtin_ia32_sqrtsd_round_mask: - case X86::BI__builtin_ia32_sqrtss_round_mask: - errorNYI("Unimplemented builtin"); - return {}; - case X86::BI__builtin_ia32_sqrtph512: - case X86::BI__builtin_ia32_sqrtps512: - case X86::BI__builtin_ia32_sqrtpd512: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value arg = ops[0]; - return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); - } - case X86::BI__builtin_ia32_pmuludq128: - case X86::BI__builtin_ia32_pmuludq256: - case X86::BI__builtin_ia32_pmuludq512: { - unsigned opTypePrimitiveSizeInBits = - cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); - return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false, - ops, opTypePrimitiveSizeInBits); - } - case X86::BI__builtin_ia32_pmuldq128: - case X86::BI__builtin_ia32_pmuldq256: - case X86::BI__builtin_ia32_pmuldq512: { - unsigned opTypePrimitiveSizeInBits = - cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); - return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true, - ops, opTypePrimitiveSizeInBits); - } - case X86::BI__builtin_ia32_pternlogd512_mask: - case X86::BI__builtin_ia32_pternlogq512_mask: - case X86::BI__builtin_ia32_pternlogd128_mask: - case X86::BI__builtin_ia32_pternlogd256_mask: - case X86::BI__builtin_ia32_pternlogq128_mask: - case X86::BI__builtin_ia32_pternlogq256_mask: - case X86::BI__builtin_ia32_pternlogd512_maskz: - case X86::BI__builtin_ia32_pternlogq512_maskz: - case X86::BI__builtin_ia32_pternlogd128_maskz: - case X86::BI__builtin_ia32_pternlogd256_maskz: - case X86::BI__builtin_ia32_pternlogq128_maskz: - case X86::BI__builtin_ia32_pternlogq256_maskz: - case X86::BI__builtin_ia32_vpshldd128: - case X86::BI__builtin_ia32_vpshldd256: - case X86::BI__builtin_ia32_vpshldd512: - case X86::BI__builtin_ia32_vpshldq128: - case X86::BI__builtin_ia32_vpshldq256: - case X86::BI__builtin_ia32_vpshldq512: - case X86::BI__builtin_ia32_vpshldw128: - case X86::BI__builtin_ia32_vpshldw256: - case X86::BI__builtin_ia32_vpshldw512: - case X86::BI__builtin_ia32_vpshrdd128: - case X86::BI__builtin_ia32_vpshrdd256: - case X86::BI__builtin_ia32_vpshrdd512: - case X86::BI__builtin_ia32_vpshrdq128: - case X86::BI__builtin_ia32_vpshrdq256: - case X86::BI__builtin_ia32_vpshrdq512: - case X86::BI__builtin_ia32_vpshrdw128: - case X86::BI__builtin_ia32_vpshrdw256: - case X86::BI__builtin_ia32_vpshrdw512: - case X86::BI__builtin_ia32_reduce_fadd_pd512: - case X86::BI__builtin_ia32_reduce_fadd_ps512: - case X86::BI__builtin_ia32_reduce_fadd_ph512: - case X86::BI__builtin_ia32_reduce_fadd_ph256: - case X86::BI__builtin_ia32_reduce_fadd_ph128: - case X86::BI__builtin_ia32_reduce_fmul_pd512: - case X86::BI__builtin_ia32_reduce_fmul_ps512: - case X86::BI__builtin_ia32_reduce_fmul_ph512: - case X86::BI__builtin_ia32_reduce_fmul_ph256: - case X86::BI__builtin_ia32_reduce_fmul_ph128: - case X86::BI__builtin_ia32_reduce_fmax_pd512: - case X86::BI__builtin_ia32_reduce_fmax_ps512: - case X86::BI__builtin_ia32_reduce_fmax_ph512: - case X86::BI__builtin_ia32_reduce_fmax_ph256: - case X86::BI__builtin_ia32_reduce_fmax_ph128: - case X86::BI__builtin_ia32_reduce_fmin_pd512: - case X86::BI__builtin_ia32_reduce_fmin_ps512: - case X86::BI__builtin_ia32_reduce_fmin_ph512: - case X86::BI__builtin_ia32_reduce_fmin_ph256: - case X86::BI__builtin_ia32_reduce_fmin_ph128: - case X86::BI__builtin_ia32_rdrand16_step: - case X86::BI__builtin_ia32_rdrand32_step: - case X86::BI__builtin_ia32_rdrand64_step: - case X86::BI__builtin_ia32_rdseed16_step: - case X86::BI__builtin_ia32_rdseed32_step: - case X86::BI__builtin_ia32_rdseed64_step: - case X86::BI__builtin_ia32_addcarryx_u32: - case X86::BI__builtin_ia32_addcarryx_u64: - case X86::BI__builtin_ia32_subborrow_u32: - case X86::BI__builtin_ia32_subborrow_u64: - case X86::BI__builtin_ia32_fpclassps128_mask: - case X86::BI__builtin_ia32_fpclassps256_mask: - case X86::BI__builtin_ia32_fpclassps512_mask: - case X86::BI__builtin_ia32_vfpclassbf16128_mask: - case X86::BI__builtin_ia32_vfpclassbf16256_mask: - case X86::BI__builtin_ia32_vfpclassbf16512_mask: - case X86::BI__builtin_ia32_fpclassph128_mask: - case X86::BI__builtin_ia32_fpclassph256_mask: - case X86::BI__builtin_ia32_fpclassph512_mask: - case X86::BI__builtin_ia32_fpclasspd128_mask: - case X86::BI__builtin_ia32_fpclasspd256_mask: - case X86::BI__builtin_ia32_fpclasspd512_mask: - case X86::BI__builtin_ia32_vp2intersect_q_512: - case X86::BI__builtin_ia32_vp2intersect_q_256: - case X86::BI__builtin_ia32_vp2intersect_q_128: - case X86::BI__builtin_ia32_vp2intersect_d_512: - case X86::BI__builtin_ia32_vp2intersect_d_256: - case X86::BI__builtin_ia32_vp2intersect_d_128: - case X86::BI__builtin_ia32_vpmultishiftqb128: - case X86::BI__builtin_ia32_vpmultishiftqb256: - case X86::BI__builtin_ia32_vpmultishiftqb512: - case X86::BI__builtin_ia32_vpshufbitqmb128_mask: - case X86::BI__builtin_ia32_vpshufbitqmb256_mask: - case X86::BI__builtin_ia32_vpshufbitqmb512_mask: - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqpd: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltpd: - return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), - cir::CmpOpKind::lt, /*shouldInvert=*/true); - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnlepd: - return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), - cir::CmpOpKind::le, /*shouldInvert=*/true); - case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordpd: - case X86::BI__builtin_ia32_cmpph128_mask: - case X86::BI__builtin_ia32_cmpph256_mask: - case X86::BI__builtin_ia32_cmpph512_mask: - case X86::BI__builtin_ia32_cmpps128_mask: - case X86::BI__builtin_ia32_cmpps256_mask: - case X86::BI__builtin_ia32_cmpps512_mask: - case X86::BI__builtin_ia32_cmppd128_mask: - case X86::BI__builtin_ia32_cmppd256_mask: - case X86::BI__builtin_ia32_cmppd512_mask: - case X86::BI__builtin_ia32_vcmpbf16512_mask: - case X86::BI__builtin_ia32_vcmpbf16256_mask: - case X86::BI__builtin_ia32_vcmpbf16128_mask: - case X86::BI__builtin_ia32_cmpps: - case X86::BI__builtin_ia32_cmpps256: - case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpeqsd: - case X86::BI__builtin_ia32_cmpltsd: - case X86::BI__builtin_ia32_cmplesd: - case X86::BI__builtin_ia32_cmpunordsd: - case X86::BI__builtin_ia32_cmpneqsd: - case X86::BI__builtin_ia32_cmpnltsd: - case X86::BI__builtin_ia32_cmpnlesd: - case X86::BI__builtin_ia32_cmpordsd: - case X86::BI__builtin_ia32_vcvtph2ps_mask: - case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: - case X86::BI__cpuid: - case X86::BI__cpuidex: - case X86::BI__emul: - case X86::BI__emulu: - case X86::BI__mulh: - case X86::BI__umulh: - case X86::BI_mul128: - case X86::BI_umul128: - case X86::BI__faststorefence: - case X86::BI__shiftleft128: - case X86::BI__shiftright128: - case X86::BI_ReadWriteBarrier: - case X86::BI_ReadBarrier: - case X86::BI_WriteBarrier: - case X86::BI_AddressOfReturnAddress: - case X86::BI__stosb: - case X86::BI__ud2: - case X86::BI__int2c: - case X86::BI__readfsbyte: - case X86::BI__readfsword: - case X86::BI__readfsdword: - case X86::BI__readfsqword: - case X86::BI__readgsbyte: - case X86::BI__readgsword: - case X86::BI__readgsdword: - case X86::BI__readgsqword: - case X86::BI__builtin_ia32_encodekey128_u32: - case X86::BI__builtin_ia32_encodekey256_u32: - case X86::BI__builtin_ia32_aesenc128kl_u8: - case X86::BI__builtin_ia32_aesdec128kl_u8: - case X86::BI__builtin_ia32_aesenc256kl_u8: - case X86::BI__builtin_ia32_aesdec256kl_u8: - case X86::BI__builtin_ia32_aesencwide128kl_u8: - case X86::BI__builtin_ia32_aesdecwide128kl_u8: - case X86::BI__builtin_ia32_aesencwide256kl_u8: - case X86::BI__builtin_ia32_aesdecwide256kl_u8: - case X86::BI__builtin_ia32_vfcmaddcph512_mask: - case X86::BI__builtin_ia32_vfmaddcph512_mask: - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: - case X86::BI__builtin_ia32_vfmaddcsh_round_mask: - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: - case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: - case X86::BI__builtin_ia32_prefetchi: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - } -} +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit x86/x86_64 Builtin calls as CIR or a function +// call to be later resolved. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenBuilder.h" +#include "CIRGenFunction.h" +#include "CIRGenModule.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/ValueRange.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/MissingFeatures.h" + +using namespace clang; +using namespace clang::CIRGen; + +template +static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, + mlir::Location loc, const StringRef str, + const mlir::Type &resTy, + Operands &&...op) { + return cir::LLVMIntrinsicCallOp::create(builder, loc, + builder.getStringAttr(str), resTy, + std::forward(op)...) + .getResult(); +} + +// OG has unordered comparison as a form of optimization in addition to +// ordered comparison, while CIR doesn't. +// +// This means that we can't encode the comparison code of UGT (unordered +// greater than), at least not at the CIR level. +// +// The boolean shouldInvert compensates for this. +// For example: to get to the comparison code UGT, we pass in +// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT. + +// There are several ways to support this otherwise: +// - register extra CmpOpKind for unordered comparison types and build the +// translation code for +// to go from CIR -> LLVM dialect. Notice we get this naturally with +// shouldInvert, benefiting from existing infrastructure, albeit having to +// generate an extra `not` at CIR). +// - Just add extra comparison code to a new VecCmpOpKind instead of +// cluttering CmpOpKind. +// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered +// comparison +// - Just emit the intrinsics call instead of calling this helper, see how the +// LLVM lowering handles this. +static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, + llvm::SmallVector &ops, + mlir::Location loc, cir::CmpOpKind pred, + bool shouldInvert) { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]); + mlir::Value bitCast = builder.createBitcast( + shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType()); + return bitCast; +} + +static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Value mask, unsigned numElems) { + auto maskTy = cir::VectorType::get( + builder.getUIntNTy(1), cast(mask.getType()).getWidth()); + mlir::Value maskVec = builder.createBitcast(mask, maskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (numElems < 8) { + SmallVector indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, i)); + + maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); + } + return maskVec; +} + +// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins. +// +// The vector is split into lanes of 8 word elements (16 bits). The lower or +// upper half of each lane, controlled by `isLow`, is shuffled in the following +// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The +// i-th field's value represents the resulting index of the i-th element in the +// half lane after shuffling. The other half of the lane remains unchanged. +static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, + const mlir::Value vec, + const mlir::Value immediate, + const mlir::Location loc, + const bool isLow) { + uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate); + + auto vecTy = cast(vec.getType()); + unsigned numElts = vecTy.getSize(); + + unsigned firstHalfStart = isLow ? 0 : 4; + unsigned secondHalfStart = 4 - firstHalfStart; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + imm = (imm & 0xff) * 0x01010101; + + int64_t indices[32]; + for (unsigned l = 0; l != numElts; l += 8) { + for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) { + indices[l + i] = l + (imm & 3) + firstHalfStart; + imm >>= 2; + } + for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i) + indices[l + i] = l + i; + } + + return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts)); +} + +// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins. +// The shuffle mask is written to outIndices. +static void +computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, + uint32_t imm, const bool isShufP, + llvm::SmallVectorImpl &outIndices) { + auto vecTy = cast(vec.getType()); + unsigned numElts = vecTy.getSize(); + unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128; + unsigned numLaneElts = numElts / numLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + imm = (imm & 0xff) * 0x01010101; + + for (unsigned l = 0; l != numElts; l += numLaneElts) { + for (unsigned i = 0; i != numLaneElts; ++i) { + uint32_t idx = imm % numLaneElts; + imm /= numLaneElts; + if (isShufP && i >= (numLaneElts / 2)) + idx += numElts; + outIndices[l + i] = l + idx; + } + } + + outIndices.resize(numElts); +} + +static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + + auto intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); + mlir::Type vecTy = lhsVec.getType(); + mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy, + mlir::ValueRange{lhsVec, rhsVec}); + return builder.createBitcast(resVec, ops[0].getType()); +} + +static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + unsigned numElems = cast(ops[0].getType()).getWidth(); + + // Convert both operands to mask vectors. + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems); + + mlir::Type i32Ty = builder.getSInt32Ty(); + + // Create indices for extracting the first half of each vector. + SmallVector halfIndices; + for (auto i : llvm::seq(0, numElems / 2)) + halfIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Extract first half of each vector. This gives better codegen than + // doing it in a single shuffle. + mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices); + mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices); + + // Create indices for concatenating the vectors. + // NOTE: Operands are swapped to match the intrinsic definition. + // After the half extraction, both vectors have numElems/2 elements. + // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] + // select from rhsHalf, and indices [numElems/2..numElems-1] select from + // lhsHalf. + SmallVector concatIndices; + for (auto i : llvm::seq(0, numElems)) + concatIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Concat the vectors (RHS first, then LHS). + mlir::Value res = + builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices); + return builder.createBitcast(res, ops[0].getType()); +} + +static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, + mlir::Location loc, + cir::BinOpKind binOpKind, + SmallVectorImpl &ops, + bool invertLHS = false) { + unsigned numElts = cast(ops[0].getType()).getWidth(); + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts); + + if (invertLHS) + lhs = builder.createNot(lhs); + return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs), + ops[0].getType()); +} + +static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + auto intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); + mlir::Type resTy = builder.getSInt32Ty(); + return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, + mlir::ValueRange{lhsVec, rhsVec}); +} + +static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Value vec, mlir::Value value, + mlir::Value indexOp) { + unsigned numElts = cast(vec.getType()).getSize(); + + uint64_t index = + indexOp.getDefiningOp().getIntValue().getZExtValue(); + + index &= numElts - 1; + + cir::ConstantOp indexVal = builder.getUInt64(index, loc); + + return cir::VecInsertOp::create(builder, loc, vec, value, indexVal); +} + +static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, + mlir::Location location, mlir::Value &op0, + mlir::Value &op1, mlir::Value &amt, + bool isRight) { + mlir::Type op0Ty = op0.getType(); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 + // so we only care about the lowest log2 bits anyway. + if (amt.getType() != op0Ty) { + auto vecTy = mlir::cast(op0Ty); + uint64_t numElems = vecTy.getSize(); + + auto amtTy = mlir::cast(amt.getType()); + auto vecElemTy = mlir::cast(vecTy.getElementType()); + + // If signed, cast to the same width but unsigned first to + // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`. + if (amtTy.isSigned()) { + cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth()); + amt = builder.createIntCast(amt, unsignedAmtTy); + } + cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth()); + amt = builder.createIntCast(amt, unsignedVecElemType); + amt = cir::VecSplatOp::create( + builder, location, cir::VectorType::get(unsignedVecElemType, numElems), + amt); + } + + const StringRef intrinsicName = isRight ? "fshr" : "fshl"; + return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty, + mlir::ValueRange{op0, op1, amt}); +} + +static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, + bool isSigned, + SmallVectorImpl &ops, + unsigned opTypePrimitiveSizeInBits) { + mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(), + opTypePrimitiveSizeInBits / 64); + mlir::Value lhs = builder.createBitcast(loc, ops[0], ty); + mlir::Value rhs = builder.createBitcast(loc, ops[1], ty); + if (isSigned) { + cir::ConstantOp shiftAmt = + builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32)); + cir::VecSplatOp shiftSplatVecOp = + cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult()); + mlir::Value shiftSplatValue = shiftSplatVecOp.getResult(); + // In CIR, right-shift operations are automatically lowered to either an + // arithmetic or logical shift depending on the operand type. The purpose + // of the shifts here is to propagate the sign bit of the 32-bit input + // into the upper bits of each vector lane. + lhs = builder.createShift(loc, lhs, shiftSplatValue, true); + lhs = builder.createShift(loc, lhs, shiftSplatValue, false); + rhs = builder.createShift(loc, rhs, shiftSplatValue, true); + rhs = builder.createShift(loc, rhs, shiftSplatValue, false); + } else { + cir::ConstantOp maskScalar = builder.getConstant( + loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff)); + cir::VecSplatOp mask = + cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult()); + // Clear the upper bits + lhs = builder.createAnd(loc, lhs, mask); + rhs = builder.createAnd(loc, rhs, mask); + } + return builder.createMul(loc, lhs, rhs); +} + +static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, + llvm::SmallVector ops, + bool isSigned) { + mlir::Value op0 = ops[0]; + mlir::Value op1 = ops[1]; + + cir::VectorType ty = cast(op0.getType()); + cir::IntType elementTy = cast(ty.getElementType()); + + uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7; + + cir::CmpOpKind pred; + switch (imm) { + case 0x0: + pred = cir::CmpOpKind::lt; + break; + case 0x1: + pred = cir::CmpOpKind::le; + break; + case 0x2: + pred = cir::CmpOpKind::gt; + break; + case 0x3: + pred = cir::CmpOpKind::ge; + break; + case 0x4: + pred = cir::CmpOpKind::eq; + break; + case 0x5: + pred = cir::CmpOpKind::ne; + break; + case 0x6: + return builder.getNullValue(ty, loc); // FALSE + case 0x7: { + llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth()); + return cir::VecSplatOp::create( + builder, loc, ty, + builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE + } + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + if ((!isSigned && elementTy.isSigned()) || + (isSigned && elementTy.isUnsigned())) { + elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth()) + : builder.getSIntNTy(elementTy.getWidth()); + ty = cir::VectorType::get(elementTy, ty.getSize()); + op0 = builder.createBitcast(op0, ty); + op1 = builder.createBitcast(op1, ty); + } + + return builder.createVecCompare(loc, pred, op0, op1); +} + +mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, + const CallExpr *expr) { + if (builtinID == Builtin::BI__builtin_cpu_is) { + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is"); + return {}; + } + if (builtinID == Builtin::BI__builtin_cpu_supports) { + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports"); + return {}; + } + if (builtinID == Builtin::BI__builtin_cpu_init) { + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init"); + return {}; + } + + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + assert(!cir::MissingFeatures::msvcBuiltins()); + + // Find out if any arguments are required to be integer constant expressions. + assert(!cir::MissingFeatures::handleBuiltinICEArguments()); + + // The operands of the builtin call + llvm::SmallVector ops; + + // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit + // is required to be a constant integer expression. + unsigned iceArguments = 0; + ASTContext::GetBuiltinTypeError error; + getContext().GetBuiltinType(builtinID, error, &iceArguments); + assert(error == ASTContext::GE_None && "Error while getting builtin type."); + + for (auto [idx, arg] : llvm::enumerate(expr->arguments())) + ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); + + CIRGenBuilderTy &builder = getBuilder(); + mlir::Type voidTy = builder.getVoidTy(); + + switch (builtinID) { + default: + return {}; + case X86::BI_mm_clflush: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.clflush", voidTy, ops[0]); + case X86::BI_mm_lfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.lfence", voidTy); + case X86::BI_mm_pause: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.pause", voidTy); + case X86::BI_mm_mfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.mfence", voidTy); + case X86::BI_mm_sfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse.sfence", voidTy); + case X86::BI_mm_prefetch: + case X86::BI__rdtsc: + case X86::BI__builtin_ia32_rdtscp: { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + case X86::BI__builtin_ia32_lzcnt_u16: + case X86::BI__builtin_ia32_lzcnt_u32: + case X86::BI__builtin_ia32_lzcnt_u64: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value isZeroPoison = builder.getFalse(loc); + return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } + case X86::BI__builtin_ia32_tzcnt_u16: + case X86::BI__builtin_ia32_tzcnt_u32: + case X86::BI__builtin_ia32_tzcnt_u64: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value isZeroPoison = builder.getFalse(loc); + return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } + case X86::BI__builtin_ia32_undef128: + case X86::BI__builtin_ia32_undef256: + case X86::BI__builtin_ia32_undef512: + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return builder.getNullValue(convertType(expr->getType()), + getLoc(expr->getExprLoc())); + case X86::BI__builtin_ia32_vec_ext_v4hi: + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v4sf: + case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vec_ext_v32qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned numElts = cast(ops[0].getType()).getSize(); + + uint64_t index = getZExtIntValueFromConstOp(ops[1]); + index &= numElts - 1; + + cir::ConstantOp indexVal = + builder.getUInt64(index, getLoc(expr->getExprLoc())); + + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()), + ops[0], indexVal); + } + case X86::BI__builtin_ia32_vec_set_v4hi: + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vec_set_v32qi: + case X86::BI__builtin_ia32_vec_set_v16hi: + case X86::BI__builtin_ia32_vec_set_v8si: + case X86::BI__builtin_ia32_vec_set_v4di: { + return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1], + ops[2]); + } + case X86::BI__builtin_ia32_kunpckhi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackb", ops); + case X86::BI__builtin_ia32_kunpcksi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackw", ops); + case X86::BI__builtin_ia32_kunpckdi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackd", ops); + case X86::BI_mm_setcsr: + case X86::BI__builtin_ia32_ldmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); + builder.createStore(loc, ops[0], tmp); + return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr", + builder.getVoidTy(), tmp.getPointer()); + } + case X86::BI_mm_getcsr: + case X86::BI__builtin_ia32_stmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getType(), loc); + emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(), + tmp.getPointer()); + return builder.createLoad(loc, tmp); + } + case X86::BI__builtin_ia32_xsave: + case X86::BI__builtin_ia32_xsave64: + case X86::BI__builtin_ia32_xrstor: + case X86::BI__builtin_ia32_xrstor64: + case X86::BI__builtin_ia32_xsaveopt: + case X86::BI__builtin_ia32_xsaveopt64: + case X86::BI__builtin_ia32_xrstors: + case X86::BI__builtin_ia32_xrstors64: + case X86::BI__builtin_ia32_xsavec: + case X86::BI__builtin_ia32_xsavec64: + case X86::BI__builtin_ia32_xsaves: + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + case X86::BI__builtin_ia32_storedqudi128_mask: + case X86::BI__builtin_ia32_storedqusi128_mask: + case X86::BI__builtin_ia32_storedquhi128_mask: + case X86::BI__builtin_ia32_storedquqi128_mask: + case X86::BI__builtin_ia32_storeupd128_mask: + case X86::BI__builtin_ia32_storeups128_mask: + case X86::BI__builtin_ia32_storedqudi256_mask: + case X86::BI__builtin_ia32_storedqusi256_mask: + case X86::BI__builtin_ia32_storedquhi256_mask: + case X86::BI__builtin_ia32_storedquqi256_mask: + case X86::BI__builtin_ia32_storeupd256_mask: + case X86::BI__builtin_ia32_storeups256_mask: + case X86::BI__builtin_ia32_storedqudi512_mask: + case X86::BI__builtin_ia32_storedqusi512_mask: + case X86::BI__builtin_ia32_storedquhi512_mask: + case X86::BI__builtin_ia32_storedquqi512_mask: + case X86::BI__builtin_ia32_storeupd512_mask: + case X86::BI__builtin_ia32_storeups512_mask: + case X86::BI__builtin_ia32_storesbf16128_mask: + case X86::BI__builtin_ia32_storesh128_mask: + case X86::BI__builtin_ia32_storess128_mask: + case X86::BI__builtin_ia32_storesd128_mask: + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtw2ph512_mask: + case X86::BI__builtin_ia32_vcvtdq2ph512_mask: + case X86::BI__builtin_ia32_vcvtqq2ph512_mask: + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtuw2ph512_mask: + case X86::BI__builtin_ia32_vcvtudq2ph512_mask: + case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: + case X86::BI__builtin_ia32_vfmaddsh3_mask: + case X86::BI__builtin_ia32_vfmaddss3_mask: + case X86::BI__builtin_ia32_vfmaddsd3_mask: + case X86::BI__builtin_ia32_vfmaddsh3_maskz: + case X86::BI__builtin_ia32_vfmaddss3_maskz: + case X86::BI__builtin_ia32_vfmaddsd3_maskz: + case X86::BI__builtin_ia32_vfmaddsh3_mask3: + case X86::BI__builtin_ia32_vfmaddss3_mask3: + case X86::BI__builtin_ia32_vfmaddsd3_mask3: + case X86::BI__builtin_ia32_vfmsubsh3_mask3: + case X86::BI__builtin_ia32_vfmsubss3_mask3: + case X86::BI__builtin_ia32_vfmsubsd3_mask3: + case X86::BI__builtin_ia32_vfmaddph512_mask: + case X86::BI__builtin_ia32_vfmaddph512_maskz: + case X86::BI__builtin_ia32_vfmaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddps512_mask: + case X86::BI__builtin_ia32_vfmaddps512_maskz: + case X86::BI__builtin_ia32_vfmaddps512_mask3: + case X86::BI__builtin_ia32_vfmsubps512_mask3: + case X86::BI__builtin_ia32_vfmaddpd512_mask: + case X86::BI__builtin_ia32_vfmaddpd512_maskz: + case X86::BI__builtin_ia32_vfmaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubph512_mask3: + case X86::BI__builtin_ia32_vfmaddsubph512_mask: + case X86::BI__builtin_ia32_vfmaddsubph512_maskz: + case X86::BI__builtin_ia32_vfmaddsubph512_mask3: + case X86::BI__builtin_ia32_vfmsubaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddsubps512_mask: + case X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case X86::BI__builtin_ia32_movdqa32store128_mask: + case X86::BI__builtin_ia32_movdqa64store128_mask: + case X86::BI__builtin_ia32_storeaps128_mask: + case X86::BI__builtin_ia32_storeapd128_mask: + case X86::BI__builtin_ia32_movdqa32store256_mask: + case X86::BI__builtin_ia32_movdqa64store256_mask: + case X86::BI__builtin_ia32_storeaps256_mask: + case X86::BI__builtin_ia32_storeapd256_mask: + case X86::BI__builtin_ia32_movdqa32store512_mask: + case X86::BI__builtin_ia32_movdqa64store512_mask: + case X86::BI__builtin_ia32_storeaps512_mask: + case X86::BI__builtin_ia32_storeapd512_mask: + case X86::BI__builtin_ia32_loadups128_mask: + case X86::BI__builtin_ia32_loadups256_mask: + case X86::BI__builtin_ia32_loadups512_mask: + case X86::BI__builtin_ia32_loadupd128_mask: + case X86::BI__builtin_ia32_loadupd256_mask: + case X86::BI__builtin_ia32_loadupd512_mask: + case X86::BI__builtin_ia32_loaddquqi128_mask: + case X86::BI__builtin_ia32_loaddquqi256_mask: + case X86::BI__builtin_ia32_loaddquqi512_mask: + case X86::BI__builtin_ia32_loaddquhi128_mask: + case X86::BI__builtin_ia32_loaddquhi256_mask: + case X86::BI__builtin_ia32_loaddquhi512_mask: + case X86::BI__builtin_ia32_loaddqusi128_mask: + case X86::BI__builtin_ia32_loaddqusi256_mask: + case X86::BI__builtin_ia32_loaddqusi512_mask: + case X86::BI__builtin_ia32_loaddqudi128_mask: + case X86::BI__builtin_ia32_loaddqudi256_mask: + case X86::BI__builtin_ia32_loaddqudi512_mask: + case X86::BI__builtin_ia32_loadsbf16128_mask: + case X86::BI__builtin_ia32_loadsh128_mask: + case X86::BI__builtin_ia32_loadss128_mask: + case X86::BI__builtin_ia32_loadsd128_mask: + case X86::BI__builtin_ia32_loadaps128_mask: + case X86::BI__builtin_ia32_loadaps256_mask: + case X86::BI__builtin_ia32_loadaps512_mask: + case X86::BI__builtin_ia32_loadapd128_mask: + case X86::BI__builtin_ia32_loadapd256_mask: + case X86::BI__builtin_ia32_loadapd512_mask: + case X86::BI__builtin_ia32_movdqa32load128_mask: + case X86::BI__builtin_ia32_movdqa32load256_mask: + case X86::BI__builtin_ia32_movdqa32load512_mask: + case X86::BI__builtin_ia32_movdqa64load128_mask: + case X86::BI__builtin_ia32_movdqa64load256_mask: + case X86::BI__builtin_ia32_movdqa64load512_mask: + case X86::BI__builtin_ia32_expandloaddf128_mask: + case X86::BI__builtin_ia32_expandloaddf256_mask: + case X86::BI__builtin_ia32_expandloaddf512_mask: + case X86::BI__builtin_ia32_expandloadsf128_mask: + case X86::BI__builtin_ia32_expandloadsf256_mask: + case X86::BI__builtin_ia32_expandloadsf512_mask: + case X86::BI__builtin_ia32_expandloaddi128_mask: + case X86::BI__builtin_ia32_expandloaddi256_mask: + case X86::BI__builtin_ia32_expandloaddi512_mask: + case X86::BI__builtin_ia32_expandloadsi128_mask: + case X86::BI__builtin_ia32_expandloadsi256_mask: + case X86::BI__builtin_ia32_expandloadsi512_mask: + case X86::BI__builtin_ia32_expandloadhi128_mask: + case X86::BI__builtin_ia32_expandloadhi256_mask: + case X86::BI__builtin_ia32_expandloadhi512_mask: + case X86::BI__builtin_ia32_expandloadqi128_mask: + case X86::BI__builtin_ia32_expandloadqi256_mask: + case X86::BI__builtin_ia32_expandloadqi512_mask: + case X86::BI__builtin_ia32_compressstoredf128_mask: + case X86::BI__builtin_ia32_compressstoredf256_mask: + case X86::BI__builtin_ia32_compressstoredf512_mask: + case X86::BI__builtin_ia32_compressstoresf128_mask: + case X86::BI__builtin_ia32_compressstoresf256_mask: + case X86::BI__builtin_ia32_compressstoresf512_mask: + case X86::BI__builtin_ia32_compressstoredi128_mask: + case X86::BI__builtin_ia32_compressstoredi256_mask: + case X86::BI__builtin_ia32_compressstoredi512_mask: + case X86::BI__builtin_ia32_compressstoresi128_mask: + case X86::BI__builtin_ia32_compressstoresi256_mask: + case X86::BI__builtin_ia32_compressstoresi512_mask: + case X86::BI__builtin_ia32_compressstorehi128_mask: + case X86::BI__builtin_ia32_compressstorehi256_mask: + case X86::BI__builtin_ia32_compressstorehi512_mask: + case X86::BI__builtin_ia32_compressstoreqi128_mask: + case X86::BI__builtin_ia32_compressstoreqi256_mask: + case X86::BI__builtin_ia32_compressstoreqi512_mask: + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + intrinsicName = "x86.avx512.mask.gather3div2.df"; + break; + case X86::BI__builtin_ia32_gather3div2di: + intrinsicName = "x86.avx512.mask.gather3div2.di"; + break; + case X86::BI__builtin_ia32_gather3div4df: + intrinsicName = "x86.avx512.mask.gather3div4.df"; + break; + case X86::BI__builtin_ia32_gather3div4di: + intrinsicName = "x86.avx512.mask.gather3div4.di"; + break; + case X86::BI__builtin_ia32_gather3div4sf: + intrinsicName = "x86.avx512.mask.gather3div4.sf"; + break; + case X86::BI__builtin_ia32_gather3div4si: + intrinsicName = "x86.avx512.mask.gather3div4.si"; + break; + case X86::BI__builtin_ia32_gather3div8sf: + intrinsicName = "x86.avx512.mask.gather3div8.sf"; + break; + case X86::BI__builtin_ia32_gather3div8si: + intrinsicName = "x86.avx512.mask.gather3div8.si"; + break; + case X86::BI__builtin_ia32_gather3siv2df: + intrinsicName = "x86.avx512.mask.gather3siv2.df"; + break; + case X86::BI__builtin_ia32_gather3siv2di: + intrinsicName = "x86.avx512.mask.gather3siv2.di"; + break; + case X86::BI__builtin_ia32_gather3siv4df: + intrinsicName = "x86.avx512.mask.gather3siv4.df"; + break; + case X86::BI__builtin_ia32_gather3siv4di: + intrinsicName = "x86.avx512.mask.gather3siv4.di"; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + intrinsicName = "x86.avx512.mask.gather3siv4.sf"; + break; + case X86::BI__builtin_ia32_gather3siv4si: + intrinsicName = "x86.avx512.mask.gather3siv4.si"; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + intrinsicName = "x86.avx512.mask.gather3siv8.sf"; + break; + case X86::BI__builtin_ia32_gather3siv8si: + intrinsicName = "x86.avx512.mask.gather3siv8.si"; + break; + case X86::BI__builtin_ia32_gathersiv8df: + intrinsicName = "x86.avx512.mask.gather.dpd.512"; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + intrinsicName = "x86.avx512.mask.gather.dps.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + intrinsicName = "x86.avx512.mask.gather.qpd.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + intrinsicName = "x86.avx512.mask.gather.qps.512"; + break; + case X86::BI__builtin_ia32_gathersiv8di: + intrinsicName = "x86.avx512.mask.gather.dpq.512"; + break; + case X86::BI__builtin_ia32_gathersiv16si: + intrinsicName = "x86.avx512.mask.gather.dpi.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + intrinsicName = "x86.avx512.mask.gather.qpq.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + intrinsicName = "x86.avx512.mask.gather.qpi.512"; + break; + } + + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned minElts = + std::min(cast(ops[0].getType()).getSize(), + cast(ops[2].getType()).getSize()); + ops[3] = getMaskVecValue(builder, loc, ops[3], minElts); + return emitIntrinsicCallOp(builder, loc, intrinsicName, + convertType(expr->getType()), ops); + } + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: { + llvm::StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + intrinsicName = "x86.avx512.mask.scatter.dpd.512"; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + intrinsicName = "x86.avx512.mask.scatter.dps.512"; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + intrinsicName = "x86.avx512.mask.scatter.qpd.512"; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + intrinsicName = "x86.avx512.mask.scatter.qps.512"; + break; + case X86::BI__builtin_ia32_scattersiv8di: + intrinsicName = "x86.avx512.mask.scatter.dpq.512"; + break; + case X86::BI__builtin_ia32_scattersiv16si: + intrinsicName = "x86.avx512.mask.scatter.dpi.512"; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + intrinsicName = "x86.avx512.mask.scatter.qpq.512"; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + intrinsicName = "x86.avx512.mask.scatter.qpi.512"; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + intrinsicName = "x86.avx512.mask.scatterdiv2.df"; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + intrinsicName = "x86.avx512.mask.scatterdiv2.di"; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + intrinsicName = "x86.avx512.mask.scatterdiv4.df"; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + intrinsicName = "x86.avx512.mask.scatterdiv4.di"; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + intrinsicName = "x86.avx512.mask.scatterdiv4.sf"; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + intrinsicName = "x86.avx512.mask.scatterdiv4.si"; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + intrinsicName = "x86.avx512.mask.scatterdiv8.sf"; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + intrinsicName = "x86.avx512.mask.scatterdiv8.si"; + break; + case X86::BI__builtin_ia32_scattersiv2df: + intrinsicName = "x86.avx512.mask.scattersiv2.df"; + break; + case X86::BI__builtin_ia32_scattersiv2di: + intrinsicName = "x86.avx512.mask.scattersiv2.di"; + break; + case X86::BI__builtin_ia32_scattersiv4df: + intrinsicName = "x86.avx512.mask.scattersiv4.df"; + break; + case X86::BI__builtin_ia32_scattersiv4di: + intrinsicName = "x86.avx512.mask.scattersiv4.di"; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + intrinsicName = "x86.avx512.mask.scattersiv4.sf"; + break; + case X86::BI__builtin_ia32_scattersiv4si: + intrinsicName = "x86.avx512.mask.scattersiv4.si"; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + intrinsicName = "x86.avx512.mask.scattersiv8.sf"; + break; + case X86::BI__builtin_ia32_scattersiv8si: + intrinsicName = "x86.avx512.mask.scattersiv8.si"; + break; + } + + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned minElts = + std::min(cast(ops[2].getType()).getSize(), + cast(ops[3].getType()).getSize()); + ops[1] = getMaskVecValue(builder, loc, ops[1], minElts); + + return emitIntrinsicCallOp(builder, loc, intrinsicName, + convertType(expr->getType()), ops); + } + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_pmovqd512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: + return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), + true); + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: + return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), + false); + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilpd512: + case X86::BI__builtin_ia32_vpermilps512: { + const uint32_t imm = getSExtIntValueFromConstOp(ops[1]); + + llvm::SmallVector mask(16); + computeFullLaneShuffleMask(*this, ops[0], imm, false, mask); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask); + } + case X86::BI__builtin_ia32_shufpd: + case X86::BI__builtin_ia32_shufpd256: + case X86::BI__builtin_ia32_shufpd512: + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: { + const uint32_t imm = getZExtIntValueFromConstOp(ops[2]); + + llvm::SmallVector mask(16); + computeFullLaneShuffleMask(*this, ops[0], imm, true, mask); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1], + mask); + } + case X86::BI__builtin_ia32_permdi256: + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi512: + case X86::BI__builtin_ia32_permdf512: + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_i64x2: + case X86::BI__builtin_ia32_vperm2f128_pd256: + case X86::BI__builtin_ia32_vperm2f128_ps256: + case X86::BI__builtin_ia32_vperm2f128_si256: + case X86::BI__builtin_ia32_permti256: + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: + case X86::BI__builtin_ia32_pslldqi512_byteshift: + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + case X86::BI__builtin_ia32_psrldqi512_byteshift: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned shiftVal = + ops[1].getDefiningOp().getIntValue().getZExtValue() & + 0xff; + unsigned numElems = cast(ops[0].getType()).getWidth(); + + if (shiftVal >= numElems) + return builder.getNullValue(ops[0].getType(), loc); + + mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); + + SmallVector indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal)); + + mlir::Value zero = builder.getNullValue(in.getType(), loc); + mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices); + return builder.createBitcast(sv, ops[0].getType()); + } + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned shiftVal = + ops[1].getDefiningOp().getIntValue().getZExtValue() & + 0xff; + unsigned numElems = cast(ops[0].getType()).getWidth(); + + if (shiftVal >= numElems) + return builder.getNullValue(ops[0].getType(), loc); + + mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); + + SmallVector indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal)); + + mlir::Value zero = builder.getNullValue(in.getType(), loc); + mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices); + return builder.createBitcast(sv, ops[0].getType()); + } + case X86::BI__builtin_ia32_vprotbi: + case X86::BI__builtin_ia32_vprotwi: + case X86::BI__builtin_ia32_vprotdi: + case X86::BI__builtin_ia32_vprotqi: + case X86::BI__builtin_ia32_prold128: + case X86::BI__builtin_ia32_prold256: + case X86::BI__builtin_ia32_prold512: + case X86::BI__builtin_ia32_prolq128: + case X86::BI__builtin_ia32_prolq256: + case X86::BI__builtin_ia32_prolq512: + return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], + ops[0], ops[1], false); + case X86::BI__builtin_ia32_prord128: + case X86::BI__builtin_ia32_prord256: + case X86::BI__builtin_ia32_prord512: + case X86::BI__builtin_ia32_prorq128: + case X86::BI__builtin_ia32_prorq256: + case X86::BI__builtin_ia32_prorq512: + return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], + ops[0], ops[1], true); + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectph_128: + case X86::BI__builtin_ia32_selectph_256: + case X86::BI__builtin_ia32_selectph_512: + case X86::BI__builtin_ia32_selectpbf_128: + case X86::BI__builtin_ia32_selectpbf_256: + case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: + case X86::BI__builtin_ia32_selectsh_128: + case X86::BI__builtin_ia32_selectsbf_128: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: + case X86::BI__builtin_ia32_cmpb128_mask: + case X86::BI__builtin_ia32_cmpb256_mask: + case X86::BI__builtin_ia32_cmpb512_mask: + case X86::BI__builtin_ia32_cmpw128_mask: + case X86::BI__builtin_ia32_cmpw256_mask: + case X86::BI__builtin_ia32_cmpw512_mask: + case X86::BI__builtin_ia32_cmpd128_mask: + case X86::BI__builtin_ia32_cmpd256_mask: + case X86::BI__builtin_ia32_cmpd512_mask: + case X86::BI__builtin_ia32_cmpq128_mask: + case X86::BI__builtin_ia32_cmpq256_mask: + case X86::BI__builtin_ia32_cmpq512_mask: + case X86::BI__builtin_ia32_ucmpb128_mask: + case X86::BI__builtin_ia32_ucmpb256_mask: + case X86::BI__builtin_ia32_ucmpb512_mask: + case X86::BI__builtin_ia32_ucmpw128_mask: + case X86::BI__builtin_ia32_ucmpw256_mask: + case X86::BI__builtin_ia32_ucmpw512_mask: + case X86::BI__builtin_ia32_ucmpd128_mask: + case X86::BI__builtin_ia32_ucmpd256_mask: + case X86::BI__builtin_ia32_ucmpd512_mask: + case X86::BI__builtin_ia32_ucmpq128_mask: + case X86::BI__builtin_ia32_ucmpq256_mask: + case X86::BI__builtin_ia32_ucmpq512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false); + case X86::BI__builtin_ia32_kortestcqi: + case X86::BI__builtin_ia32_kortestchi: + case X86::BI__builtin_ia32_kortestcsi: + case X86::BI__builtin_ia32_kortestcdi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + cir::IntType ty = cast(ops[0].getType()); + mlir::Value allOnesOp = + builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth())); + mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); + mlir::Value cmp = + cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp); + return builder.createCast(cir::CastKind::bool_to_int, cmp, + cgm.convertType(expr->getType())); + } + case X86::BI__builtin_ia32_kortestzqi: + case X86::BI__builtin_ia32_kortestzhi: + case X86::BI__builtin_ia32_kortestzsi: + case X86::BI__builtin_ia32_kortestzdi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + cir::IntType ty = cast(ops[0].getType()); + mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult(); + mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); + mlir::Value cmp = + cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp); + return builder.createCast(cir::CastKind::bool_to_int, cmp, + cgm.convertType(expr->getType())); + } + case X86::BI__builtin_ia32_ktestcqi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.b", ops); + case X86::BI__builtin_ia32_ktestzqi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.b", ops); + case X86::BI__builtin_ia32_ktestchi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.w", ops); + case X86::BI__builtin_ia32_ktestzhi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.w", ops); + case X86::BI__builtin_ia32_ktestcsi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.d", ops); + case X86::BI__builtin_ia32_ktestzsi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.d", ops); + case X86::BI__builtin_ia32_ktestcdi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.q", ops); + case X86::BI__builtin_ia32_ktestzdi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.q", ops); + case X86::BI__builtin_ia32_kaddqi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.b", ops); + case X86::BI__builtin_ia32_kaddhi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.w", ops); + case X86::BI__builtin_ia32_kaddsi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.d", ops); + case X86::BI__builtin_ia32_kadddi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.q", ops); + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::And, ops); + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::And, ops, true); + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Or, ops); + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Xor, ops, true); + case X86::BI__builtin_ia32_kxorqi: + case X86::BI__builtin_ia32_kxorhi: + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Xor, ops); + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: { + cir::IntType intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value resVec = + getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); + return builder.createBitcast(builder.createNot(resVec), ops[0].getType()); + } + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: { + // Bitcast to vXi1 type and then back to integer. This gets the mask + // register type into the IR, but might be optimized out depending on + // what's around it. + cir::IntType intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value resVec = + getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); + return builder.createBitcast(resVec, ops[0].getType()); + } + case X86::BI__builtin_ia32_sqrtsh_round_mask: + case X86::BI__builtin_ia32_sqrtsd_round_mask: + case X86::BI__builtin_ia32_sqrtss_round_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_sqrtph512: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value arg = ops[0]; + return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); + } + case X86::BI__builtin_ia32_pmuludq128: + case X86::BI__builtin_ia32_pmuludq256: + case X86::BI__builtin_ia32_pmuludq512: { + unsigned opTypePrimitiveSizeInBits = + cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); + return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false, + ops, opTypePrimitiveSizeInBits); + } + case X86::BI__builtin_ia32_pmuldq128: + case X86::BI__builtin_ia32_pmuldq256: + case X86::BI__builtin_ia32_pmuldq512: { + unsigned opTypePrimitiveSizeInBits = + cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); + return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true, + ops, opTypePrimitiveSizeInBits); + } + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + case X86::BI__builtin_ia32_reduce_fadd_pd512: + case X86::BI__builtin_ia32_reduce_fadd_ps512: + case X86::BI__builtin_ia32_reduce_fadd_ph512: + case X86::BI__builtin_ia32_reduce_fadd_ph256: + case X86::BI__builtin_ia32_reduce_fadd_ph128: + case X86::BI__builtin_ia32_reduce_fmul_pd512: + case X86::BI__builtin_ia32_reduce_fmul_ps512: + case X86::BI__builtin_ia32_reduce_fmul_ph512: + case X86::BI__builtin_ia32_reduce_fmul_ph256: + case X86::BI__builtin_ia32_reduce_fmul_ph128: + case X86::BI__builtin_ia32_reduce_fmax_pd512: + case X86::BI__builtin_ia32_reduce_fmax_ps512: + case X86::BI__builtin_ia32_reduce_fmax_ph512: + case X86::BI__builtin_ia32_reduce_fmax_ph256: + case X86::BI__builtin_ia32_reduce_fmax_ph128: + case X86::BI__builtin_ia32_reduce_fmin_pd512: + case X86::BI__builtin_ia32_reduce_fmin_ps512: + case X86::BI__builtin_ia32_reduce_fmin_ph512: + case X86::BI__builtin_ia32_reduce_fmin_ph256: + case X86::BI__builtin_ia32_reduce_fmin_ph128: + case X86::BI__builtin_ia32_rdrand16_step: + case X86::BI__builtin_ia32_rdrand32_step: + case X86::BI__builtin_ia32_rdrand64_step: + case X86::BI__builtin_ia32_rdseed16_step: + case X86::BI__builtin_ia32_rdseed32_step: + case X86::BI__builtin_ia32_rdseed64_step: + case X86::BI__builtin_ia32_addcarryx_u32: + case X86::BI__builtin_ia32_addcarryx_u64: + case X86::BI__builtin_ia32_subborrow_u32: + case X86::BI__builtin_ia32_subborrow_u64: + case X86::BI__builtin_ia32_fpclassps128_mask: + case X86::BI__builtin_ia32_fpclassps256_mask: + case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_vfpclassbf16128_mask: + case X86::BI__builtin_ia32_vfpclassbf16256_mask: + case X86::BI__builtin_ia32_vfpclassbf16512_mask: + case X86::BI__builtin_ia32_fpclassph128_mask: + case X86::BI__builtin_ia32_fpclassph256_mask: + case X86::BI__builtin_ia32_fpclassph512_mask: + case X86::BI__builtin_ia32_fpclasspd128_mask: + case X86::BI__builtin_ia32_fpclasspd256_mask: + case X86::BI__builtin_ia32_fpclasspd512_mask: + case X86::BI__builtin_ia32_vp2intersect_q_512: + case X86::BI__builtin_ia32_vp2intersect_q_256: + case X86::BI__builtin_ia32_vp2intersect_q_128: + case X86::BI__builtin_ia32_vp2intersect_d_512: + case X86::BI__builtin_ia32_vp2intersect_d_256: + case X86::BI__builtin_ia32_vp2intersect_d_128: + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqpd: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltpd: + return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), + cir::CmpOpKind::lt, /*shouldInvert=*/true); + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnlepd: + return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), + cir::CmpOpKind::le, /*shouldInvert=*/true); + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpph128_mask: + case X86::BI__builtin_ia32_cmpph256_mask: + case X86::BI__builtin_ia32_cmpph512_mask: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: + case X86::BI__builtin_ia32_vcmpbf16512_mask: + case X86::BI__builtin_ia32_vcmpbf16256_mask: + case X86::BI__builtin_ia32_vcmpbf16128_mask: + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + case X86::BI__builtin_ia32_vcvtph2ps_mask: + case X86::BI__builtin_ia32_vcvtph2ps256_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + case X86::BI__cpuid: + case X86::BI__cpuidex: + case X86::BI__emul: + case X86::BI__emulu: + case X86::BI__mulh: + case X86::BI__umulh: + case X86::BI_mul128: + case X86::BI_umul128: + case X86::BI__faststorefence: + case X86::BI__shiftleft128: + case X86::BI__shiftright128: + case X86::BI_ReadWriteBarrier: + case X86::BI_ReadBarrier: + case X86::BI_WriteBarrier: + case X86::BI_AddressOfReturnAddress: + case X86::BI__stosb: + case X86::BI__ud2: + case X86::BI__int2c: + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: + case X86::BI__builtin_ia32_encodekey128_u32: + case X86::BI__builtin_ia32_encodekey256_u32: + case X86::BI__builtin_ia32_aesenc128kl_u8: + case X86::BI__builtin_ia32_aesdec128kl_u8: + case X86::BI__builtin_ia32_aesenc256kl_u8: + case X86::BI__builtin_ia32_aesdec256kl_u8: + case X86::BI__builtin_ia32_aesencwide128kl_u8: + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + case X86::BI__builtin_ia32_aesencwide256kl_u8: + case X86::BI__builtin_ia32_aesdecwide256kl_u8: + case X86::BI__builtin_ia32_vfcmaddcph512_mask: + case X86::BI__builtin_ia32_vfmaddcph512_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: + case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: + case X86::BI__builtin_ia32_prefetchi: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } +} diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch deleted file mode 100644 index 87c0ca69ac8abe6aaa684ffbbce3c65e342f6066..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12058 zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$ zv=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw@2E?gZTD9vmS+IJum2D zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B% zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@ z72&0L_M&ZeqO}fWOPuPCuGis@x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz* zYVV{s&bbKhwLjM69JXa4%V|k>FK6O_x1Z>~7uLls z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5mg}Ji zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W- zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6qPTum| zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(icnT;%Y@TU-AB7rhN6 z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#- zEOU3*%q4epc2@}3`8dv7)=nR3@AKL_ST8*B8QRNSw3ad#cG8y}_e3KhdE@rO&|+-`Rtr9a@{d{p zj#!8sImaJ0+7SivWU7Y>&BLaz}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^ zDHA)W(wIrD^+tE5iL#DB)kc@yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J@+fwrmpSnx_v1_UJ^fG9O51 zh;DPsFbkxbPgd5@+O~Wy*-w(fGZU74EDr9W*c&>Sjh@Go=f_RSL!R=;^g=6NM{X~~ zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8 zs=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3 zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>WfFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok@AbSKE4q|C z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m z-H_#w5vgd`YtZ*sLwmxCQe@A$ru*yJ7t*!H^57GPy6_Imr*$3l#x?ffQsr?4 ze!^(SdXW23d8f!B)YR;5j8epdb|n*WYGf#4 z3t7*40d4)|`NJD;16PmYz+ z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+ zR@2pNi>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB@Q~DiK z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML i>Z$uV<~q4bZjLO@Ne}=*8c%%jt^@9 From 47f9b2ffd8a1893c2929b003811ba0716883c8a0 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Sun, 7 Dec 2025 17:21:57 +0530 Subject: [PATCH 30/33] update clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.cpp --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d2ffb9d98d6d0..228da428844e9 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -186,7 +186,7 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( return mlir::success(); } -mlir::LogicalResult SqrtOpLowering::matchAndRewrite( +mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( cir::SqrtOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { mlir::Type resTy = typeConverter->convertType(op.getType()); From 15f1f4f11204e308fd05739f21449df91b69da60 Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Sun, 7 Dec 2025 21:52:03 +0530 Subject: [PATCH 31/33] update clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c --- .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c index a3de192f9e142..d540e9c227e67 100644 --- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c +++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c @@ -1,45 +1,45 @@ -#include -// Test X86-specific sqrt builtins - -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll -// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll -// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s - -// Test __builtin_ia32_sqrtph512 -__m512h test_sqrtph512(__m512h a) { - return __builtin_ia32_sqrtph512(a); -} -// CIR: cir.func @test_sqrtph512 -// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector -// CIR: cir.return [[RES]] -// LLVM: define {{.*}} @test_sqrtph512 -// LLVM: call <32 x half> @llvm.sqrt.v32f16 -// OGCG: define {{.*}} @test_sqrtph512 -// OGCG: call <32 x half> @llvm.sqrt.v32f16 - -// Test __builtin_ia32_sqrtps512 -__m512 test_sqrtps512(__m512 a) { - return __builtin_ia32_sqrtps512(a); -} -// CIR: cir.func @test_sqrtps512 -// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector -// CIR: cir.return [[RES]] -// LLVM: define {{.*}} @test_sqrtps512 -// LLVM: call <16 x float> @llvm.sqrt.v16f32 -// OGCG: define {{.*}} @test_sqrtps512 -// OGCG: call <16 x float> @llvm.sqrt.v16f32 - -// Test __builtin_ia32_sqrtpd512 -__m512d test_sqrtpd512(__m512d a) { - return __builtin_ia32_sqrtpd512(a); -} -// CIR: cir.func @test_sqrtpd512 -// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector -// CIR: cir.return [[RES]] -// LLVM: define {{.*}} @test_sqrtpd512 -// LLVM: call <8 x double> @llvm.sqrt.v8f64 -// OGCG: define {{.*}} @test_sqrtpd512 +// Test X86-specific sqrt builtins + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512f -target-feature +avx512fp16 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); +typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); +typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64))); + +// Test __builtin_ia32_sqrtph512 +__m512h test_sqrtph512(__m512h a) { + return __builtin_ia32_sqrtph512(a, 4); +} +// CIR-LABEL: cir.func {{.*}}@test_sqrtph512 +// CIR: cir.sqrt {{%.*}} : !cir.vector<32 x !cir.f16> +// LLVM-LABEL: define {{.*}} @test_sqrtph512 +// LLVM: call <32 x half> @llvm.sqrt.v32f16 +// OGCG-LABEL: define {{.*}} @test_sqrtph512 +// OGCG: call <32 x half> @llvm.sqrt.v32f16 + +// Test __builtin_ia32_sqrtps512 +__m512 test_sqrtps512(__m512 a) { + return __builtin_ia32_sqrtps512(a, 4); +} +// CIR-LABEL: cir.func {{.*}}@test_sqrtps512 +// CIR: cir.sqrt {{%.*}} : !cir.vector<16 x !cir.float> +// LLVM-LABEL: define {{.*}} @test_sqrtps512 +// LLVM: call <16 x float> @llvm.sqrt.v16f32 +// OGCG-LABEL: define {{.*}} @test_sqrtps512 +// OGCG: call <16 x float> @llvm.sqrt.v16f32 + +// Test __builtin_ia32_sqrtpd512 +__m512d test_sqrtpd512(__m512d a) { + return __builtin_ia32_sqrtpd512(a, 4); +} +// CIR-LABEL: cir.func {{.*}}@test_sqrtpd512 +// CIR: cir.sqrt {{%.*}} : !cir.vector<8 x !cir.double> +// LLVM-LABEL: define {{.*}} @test_sqrtpd512 +// LLVM: call <8 x double> @llvm.sqrt.v8f64 +// OGCG-LABEL: define {{.*}} @test_sqrtpd512 // OGCG: call <8 x double> @llvm.sqrt.v8f64 \ No newline at end of file From b12779ab57b19b35a6affc361f3882574bbdddfd Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <10b.priyanshu@gmail.com> Date: Tue, 9 Dec 2025 02:15:00 +0530 Subject: [PATCH 32/33] Fix line endings in CIRGenBuiltinX86.cpp --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3142 ++++++++++---------- 1 file changed, 1571 insertions(+), 1571 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 080a696b868cf..fb17e31bf36d6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1,1571 +1,1571 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This contains code to emit x86/x86_64 Builtin calls as CIR or a function -// call to be later resolved. -// -//===----------------------------------------------------------------------===// - -#include "CIRGenBuilder.h" -#include "CIRGenFunction.h" -#include "CIRGenModule.h" -#include "mlir/IR/Location.h" -#include "mlir/IR/ValueRange.h" -#include "clang/Basic/Builtins.h" -#include "clang/Basic/TargetBuiltins.h" -#include "clang/CIR/Dialect/IR/CIRTypes.h" -#include "clang/CIR/MissingFeatures.h" - -using namespace clang; -using namespace clang::CIRGen; - -template -static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, - mlir::Location loc, const StringRef str, - const mlir::Type &resTy, - Operands &&...op) { - return cir::LLVMIntrinsicCallOp::create(builder, loc, - builder.getStringAttr(str), resTy, - std::forward(op)...) - .getResult(); -} - -// OG has unordered comparison as a form of optimization in addition to -// ordered comparison, while CIR doesn't. -// -// This means that we can't encode the comparison code of UGT (unordered -// greater than), at least not at the CIR level. -// -// The boolean shouldInvert compensates for this. -// For example: to get to the comparison code UGT, we pass in -// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT. - -// There are several ways to support this otherwise: -// - register extra CmpOpKind for unordered comparison types and build the -// translation code for -// to go from CIR -> LLVM dialect. Notice we get this naturally with -// shouldInvert, benefiting from existing infrastructure, albeit having to -// generate an extra `not` at CIR). -// - Just add extra comparison code to a new VecCmpOpKind instead of -// cluttering CmpOpKind. -// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered -// comparison -// - Just emit the intrinsics call instead of calling this helper, see how the -// LLVM lowering handles this. -static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, - llvm::SmallVector &ops, - mlir::Location loc, cir::CmpOpKind pred, - bool shouldInvert) { - assert(!cir::MissingFeatures::cgFPOptionsRAII()); - // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]); - mlir::Value bitCast = builder.createBitcast( - shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType()); - return bitCast; -} - -static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, - mlir::Value mask, unsigned numElems) { - auto maskTy = cir::VectorType::get( - builder.getUIntNTy(1), cast(mask.getType()).getWidth()); - mlir::Value maskVec = builder.createBitcast(mask, maskTy); - - // If we have less than 8 elements, then the starting mask was an i8 and - // we need to extract down to the right number of elements. - if (numElems < 8) { - SmallVector indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (auto i : llvm::seq(0, numElems)) - indices.push_back(cir::IntAttr::get(i32Ty, i)); - - maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); - } - return maskVec; -} - -// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins. -// -// The vector is split into lanes of 8 word elements (16 bits). The lower or -// upper half of each lane, controlled by `isLow`, is shuffled in the following -// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The -// i-th field's value represents the resulting index of the i-th element in the -// half lane after shuffling. The other half of the lane remains unchanged. -static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, - const mlir::Value vec, - const mlir::Value immediate, - const mlir::Location loc, - const bool isLow) { - uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate); - - auto vecTy = cast(vec.getType()); - unsigned numElts = vecTy.getSize(); - - unsigned firstHalfStart = isLow ? 0 : 4; - unsigned secondHalfStart = 4 - firstHalfStart; - - // Splat the 8-bits of immediate 4 times to help the loop wrap around. - imm = (imm & 0xff) * 0x01010101; - - int64_t indices[32]; - for (unsigned l = 0; l != numElts; l += 8) { - for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) { - indices[l + i] = l + (imm & 3) + firstHalfStart; - imm >>= 2; - } - for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i) - indices[l + i] = l + i; - } - - return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts)); -} - -// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins. -// The shuffle mask is written to outIndices. -static void -computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, - uint32_t imm, const bool isShufP, - llvm::SmallVectorImpl &outIndices) { - auto vecTy = cast(vec.getType()); - unsigned numElts = vecTy.getSize(); - unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128; - unsigned numLaneElts = numElts / numLanes; - - // Splat the 8-bits of immediate 4 times to help the loop wrap around. - imm = (imm & 0xff) * 0x01010101; - - for (unsigned l = 0; l != numElts; l += numLaneElts) { - for (unsigned i = 0; i != numLaneElts; ++i) { - uint32_t idx = imm % numLaneElts; - imm /= numLaneElts; - if (isShufP && i >= (numLaneElts / 2)) - idx += numElts; - outIndices[l + i] = l + idx; - } - } - - outIndices.resize(numElts); -} - -static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, - mlir::Location loc, - const std::string &intrinsicName, - SmallVectorImpl &ops) { - - auto intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); - mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); - mlir::Type vecTy = lhsVec.getType(); - mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy, - mlir::ValueRange{lhsVec, rhsVec}); - return builder.createBitcast(resVec, ops[0].getType()); -} - -static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, - mlir::Location loc, - const std::string &intrinsicName, - SmallVectorImpl &ops) { - unsigned numElems = cast(ops[0].getType()).getWidth(); - - // Convert both operands to mask vectors. - mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems); - mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems); - - mlir::Type i32Ty = builder.getSInt32Ty(); - - // Create indices for extracting the first half of each vector. - SmallVector halfIndices; - for (auto i : llvm::seq(0, numElems / 2)) - halfIndices.push_back(cir::IntAttr::get(i32Ty, i)); - - // Extract first half of each vector. This gives better codegen than - // doing it in a single shuffle. - mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices); - mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices); - - // Create indices for concatenating the vectors. - // NOTE: Operands are swapped to match the intrinsic definition. - // After the half extraction, both vectors have numElems/2 elements. - // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] - // select from rhsHalf, and indices [numElems/2..numElems-1] select from - // lhsHalf. - SmallVector concatIndices; - for (auto i : llvm::seq(0, numElems)) - concatIndices.push_back(cir::IntAttr::get(i32Ty, i)); - - // Concat the vectors (RHS first, then LHS). - mlir::Value res = - builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices); - return builder.createBitcast(res, ops[0].getType()); -} - -static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, - mlir::Location loc, - cir::BinOpKind binOpKind, - SmallVectorImpl &ops, - bool invertLHS = false) { - unsigned numElts = cast(ops[0].getType()).getWidth(); - mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts); - mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts); - - if (invertLHS) - lhs = builder.createNot(lhs); - return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs), - ops[0].getType()); -} - -static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, - const std::string &intrinsicName, - SmallVectorImpl &ops) { - auto intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); - mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); - mlir::Type resTy = builder.getSInt32Ty(); - return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, - mlir::ValueRange{lhsVec, rhsVec}); -} - -static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, - mlir::Value vec, mlir::Value value, - mlir::Value indexOp) { - unsigned numElts = cast(vec.getType()).getSize(); - - uint64_t index = - indexOp.getDefiningOp().getIntValue().getZExtValue(); - - index &= numElts - 1; - - cir::ConstantOp indexVal = builder.getUInt64(index, loc); - - return cir::VecInsertOp::create(builder, loc, vec, value, indexVal); -} - -static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, - mlir::Location location, mlir::Value &op0, - mlir::Value &op1, mlir::Value &amt, - bool isRight) { - mlir::Type op0Ty = op0.getType(); - - // Amount may be scalar immediate, in which case create a splat vector. - // Funnel shifts amounts are treated as modulo and types are all power-of-2 - // so we only care about the lowest log2 bits anyway. - if (amt.getType() != op0Ty) { - auto vecTy = mlir::cast(op0Ty); - uint64_t numElems = vecTy.getSize(); - - auto amtTy = mlir::cast(amt.getType()); - auto vecElemTy = mlir::cast(vecTy.getElementType()); - - // If signed, cast to the same width but unsigned first to - // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`. - if (amtTy.isSigned()) { - cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth()); - amt = builder.createIntCast(amt, unsignedAmtTy); - } - cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth()); - amt = builder.createIntCast(amt, unsignedVecElemType); - amt = cir::VecSplatOp::create( - builder, location, cir::VectorType::get(unsignedVecElemType, numElems), - amt); - } - - const StringRef intrinsicName = isRight ? "fshr" : "fshl"; - return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty, - mlir::ValueRange{op0, op1, amt}); -} - -static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, - bool isSigned, - SmallVectorImpl &ops, - unsigned opTypePrimitiveSizeInBits) { - mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(), - opTypePrimitiveSizeInBits / 64); - mlir::Value lhs = builder.createBitcast(loc, ops[0], ty); - mlir::Value rhs = builder.createBitcast(loc, ops[1], ty); - if (isSigned) { - cir::ConstantOp shiftAmt = - builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32)); - cir::VecSplatOp shiftSplatVecOp = - cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult()); - mlir::Value shiftSplatValue = shiftSplatVecOp.getResult(); - // In CIR, right-shift operations are automatically lowered to either an - // arithmetic or logical shift depending on the operand type. The purpose - // of the shifts here is to propagate the sign bit of the 32-bit input - // into the upper bits of each vector lane. - lhs = builder.createShift(loc, lhs, shiftSplatValue, true); - lhs = builder.createShift(loc, lhs, shiftSplatValue, false); - rhs = builder.createShift(loc, rhs, shiftSplatValue, true); - rhs = builder.createShift(loc, rhs, shiftSplatValue, false); - } else { - cir::ConstantOp maskScalar = builder.getConstant( - loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff)); - cir::VecSplatOp mask = - cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult()); - // Clear the upper bits - lhs = builder.createAnd(loc, lhs, mask); - rhs = builder.createAnd(loc, rhs, mask); - } - return builder.createMul(loc, lhs, rhs); -} - -static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, - llvm::SmallVector ops, - bool isSigned) { - mlir::Value op0 = ops[0]; - mlir::Value op1 = ops[1]; - - cir::VectorType ty = cast(op0.getType()); - cir::IntType elementTy = cast(ty.getElementType()); - - uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7; - - cir::CmpOpKind pred; - switch (imm) { - case 0x0: - pred = cir::CmpOpKind::lt; - break; - case 0x1: - pred = cir::CmpOpKind::le; - break; - case 0x2: - pred = cir::CmpOpKind::gt; - break; - case 0x3: - pred = cir::CmpOpKind::ge; - break; - case 0x4: - pred = cir::CmpOpKind::eq; - break; - case 0x5: - pred = cir::CmpOpKind::ne; - break; - case 0x6: - return builder.getNullValue(ty, loc); // FALSE - case 0x7: { - llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth()); - return cir::VecSplatOp::create( - builder, loc, ty, - builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE - } - default: - llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); - } - - if ((!isSigned && elementTy.isSigned()) || - (isSigned && elementTy.isUnsigned())) { - elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth()) - : builder.getSIntNTy(elementTy.getWidth()); - ty = cir::VectorType::get(elementTy, ty.getSize()); - op0 = builder.createBitcast(op0, ty); - op1 = builder.createBitcast(op1, ty); - } - - return builder.createVecCompare(loc, pred, op0, op1); -} - -mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, - const CallExpr *expr) { - if (builtinID == Builtin::BI__builtin_cpu_is) { - cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is"); - return {}; - } - if (builtinID == Builtin::BI__builtin_cpu_supports) { - cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports"); - return {}; - } - if (builtinID == Builtin::BI__builtin_cpu_init) { - cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init"); - return {}; - } - - // Handle MSVC intrinsics before argument evaluation to prevent double - // evaluation. - assert(!cir::MissingFeatures::msvcBuiltins()); - - // Find out if any arguments are required to be integer constant expressions. - assert(!cir::MissingFeatures::handleBuiltinICEArguments()); - - // The operands of the builtin call - llvm::SmallVector ops; - - // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit - // is required to be a constant integer expression. - unsigned iceArguments = 0; - ASTContext::GetBuiltinTypeError error; - getContext().GetBuiltinType(builtinID, error, &iceArguments); - assert(error == ASTContext::GE_None && "Error while getting builtin type."); - - for (auto [idx, arg] : llvm::enumerate(expr->arguments())) - ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); - - CIRGenBuilderTy &builder = getBuilder(); - mlir::Type voidTy = builder.getVoidTy(); - - switch (builtinID) { - default: - return {}; - case X86::BI_mm_clflush: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.clflush", voidTy, ops[0]); - case X86::BI_mm_lfence: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.lfence", voidTy); - case X86::BI_mm_pause: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.pause", voidTy); - case X86::BI_mm_mfence: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse2.mfence", voidTy); - case X86::BI_mm_sfence: - return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - "x86.sse.sfence", voidTy); - case X86::BI_mm_prefetch: - case X86::BI__rdtsc: - case X86::BI__builtin_ia32_rdtscp: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - } - case X86::BI__builtin_ia32_lzcnt_u16: - case X86::BI__builtin_ia32_lzcnt_u32: - case X86::BI__builtin_ia32_lzcnt_u64: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value isZeroPoison = builder.getFalse(loc); - return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(), - mlir::ValueRange{ops[0], isZeroPoison}); - } - case X86::BI__builtin_ia32_tzcnt_u16: - case X86::BI__builtin_ia32_tzcnt_u32: - case X86::BI__builtin_ia32_tzcnt_u64: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value isZeroPoison = builder.getFalse(loc); - return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(), - mlir::ValueRange{ops[0], isZeroPoison}); - } - case X86::BI__builtin_ia32_undef128: - case X86::BI__builtin_ia32_undef256: - case X86::BI__builtin_ia32_undef512: - // The x86 definition of "undef" is not the same as the LLVM definition - // (PR32176). We leave optimizing away an unnecessary zero constant to the - // IR optimizer and backend. - // TODO: If we had a "freeze" IR instruction to generate a fixed undef - // value, we should use that here instead of a zero. - return builder.getNullValue(convertType(expr->getType()), - getLoc(expr->getExprLoc())); - case X86::BI__builtin_ia32_vec_ext_v4hi: - case X86::BI__builtin_ia32_vec_ext_v16qi: - case X86::BI__builtin_ia32_vec_ext_v8hi: - case X86::BI__builtin_ia32_vec_ext_v4si: - case X86::BI__builtin_ia32_vec_ext_v4sf: - case X86::BI__builtin_ia32_vec_ext_v2di: - case X86::BI__builtin_ia32_vec_ext_v32qi: - case X86::BI__builtin_ia32_vec_ext_v16hi: - case X86::BI__builtin_ia32_vec_ext_v8si: - case X86::BI__builtin_ia32_vec_ext_v4di: { - unsigned numElts = cast(ops[0].getType()).getSize(); - - uint64_t index = getZExtIntValueFromConstOp(ops[1]); - index &= numElts - 1; - - cir::ConstantOp indexVal = - builder.getUInt64(index, getLoc(expr->getExprLoc())); - - // These builtins exist so we can ensure the index is an ICE and in range. - // Otherwise we could just do this in the header file. - return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()), - ops[0], indexVal); - } - case X86::BI__builtin_ia32_vec_set_v4hi: - case X86::BI__builtin_ia32_vec_set_v16qi: - case X86::BI__builtin_ia32_vec_set_v8hi: - case X86::BI__builtin_ia32_vec_set_v4si: - case X86::BI__builtin_ia32_vec_set_v2di: - case X86::BI__builtin_ia32_vec_set_v32qi: - case X86::BI__builtin_ia32_vec_set_v16hi: - case X86::BI__builtin_ia32_vec_set_v8si: - case X86::BI__builtin_ia32_vec_set_v4di: { - return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1], - ops[2]); - } - case X86::BI__builtin_ia32_kunpckhi: - return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kunpackb", ops); - case X86::BI__builtin_ia32_kunpcksi: - return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kunpackw", ops); - case X86::BI__builtin_ia32_kunpckdi: - return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kunpackd", ops); - case X86::BI_mm_setcsr: - case X86::BI__builtin_ia32_ldmxcsr: { - mlir::Location loc = getLoc(expr->getExprLoc()); - Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); - builder.createStore(loc, ops[0], tmp); - return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr", - builder.getVoidTy(), tmp.getPointer()); - } - case X86::BI_mm_getcsr: - case X86::BI__builtin_ia32_stmxcsr: { - mlir::Location loc = getLoc(expr->getExprLoc()); - Address tmp = createMemTemp(expr->getType(), loc); - emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(), - tmp.getPointer()); - return builder.createLoad(loc, tmp); - } - case X86::BI__builtin_ia32_xsave: - case X86::BI__builtin_ia32_xsave64: - case X86::BI__builtin_ia32_xrstor: - case X86::BI__builtin_ia32_xrstor64: - case X86::BI__builtin_ia32_xsaveopt: - case X86::BI__builtin_ia32_xsaveopt64: - case X86::BI__builtin_ia32_xrstors: - case X86::BI__builtin_ia32_xrstors64: - case X86::BI__builtin_ia32_xsavec: - case X86::BI__builtin_ia32_xsavec64: - case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: - case X86::BI__builtin_ia32_xsetbv: - case X86::BI_xsetbv: - case X86::BI__builtin_ia32_xgetbv: - case X86::BI_xgetbv: - case X86::BI__builtin_ia32_storedqudi128_mask: - case X86::BI__builtin_ia32_storedqusi128_mask: - case X86::BI__builtin_ia32_storedquhi128_mask: - case X86::BI__builtin_ia32_storedquqi128_mask: - case X86::BI__builtin_ia32_storeupd128_mask: - case X86::BI__builtin_ia32_storeups128_mask: - case X86::BI__builtin_ia32_storedqudi256_mask: - case X86::BI__builtin_ia32_storedqusi256_mask: - case X86::BI__builtin_ia32_storedquhi256_mask: - case X86::BI__builtin_ia32_storedquqi256_mask: - case X86::BI__builtin_ia32_storeupd256_mask: - case X86::BI__builtin_ia32_storeups256_mask: - case X86::BI__builtin_ia32_storedqudi512_mask: - case X86::BI__builtin_ia32_storedqusi512_mask: - case X86::BI__builtin_ia32_storedquhi512_mask: - case X86::BI__builtin_ia32_storedquqi512_mask: - case X86::BI__builtin_ia32_storeupd512_mask: - case X86::BI__builtin_ia32_storeups512_mask: - case X86::BI__builtin_ia32_storesbf16128_mask: - case X86::BI__builtin_ia32_storesh128_mask: - case X86::BI__builtin_ia32_storess128_mask: - case X86::BI__builtin_ia32_storesd128_mask: - case X86::BI__builtin_ia32_cvtmask2b128: - case X86::BI__builtin_ia32_cvtmask2b256: - case X86::BI__builtin_ia32_cvtmask2b512: - case X86::BI__builtin_ia32_cvtmask2w128: - case X86::BI__builtin_ia32_cvtmask2w256: - case X86::BI__builtin_ia32_cvtmask2w512: - case X86::BI__builtin_ia32_cvtmask2d128: - case X86::BI__builtin_ia32_cvtmask2d256: - case X86::BI__builtin_ia32_cvtmask2d512: - case X86::BI__builtin_ia32_cvtmask2q128: - case X86::BI__builtin_ia32_cvtmask2q256: - case X86::BI__builtin_ia32_cvtmask2q512: - case X86::BI__builtin_ia32_cvtb2mask128: - case X86::BI__builtin_ia32_cvtb2mask256: - case X86::BI__builtin_ia32_cvtb2mask512: - case X86::BI__builtin_ia32_cvtw2mask128: - case X86::BI__builtin_ia32_cvtw2mask256: - case X86::BI__builtin_ia32_cvtw2mask512: - case X86::BI__builtin_ia32_cvtd2mask128: - case X86::BI__builtin_ia32_cvtd2mask256: - case X86::BI__builtin_ia32_cvtd2mask512: - case X86::BI__builtin_ia32_cvtq2mask128: - case X86::BI__builtin_ia32_cvtq2mask256: - case X86::BI__builtin_ia32_cvtq2mask512: - case X86::BI__builtin_ia32_cvtdq2ps512_mask: - case X86::BI__builtin_ia32_cvtqq2ps512_mask: - case X86::BI__builtin_ia32_cvtqq2pd512_mask: - case X86::BI__builtin_ia32_vcvtw2ph512_mask: - case X86::BI__builtin_ia32_vcvtdq2ph512_mask: - case X86::BI__builtin_ia32_vcvtqq2ph512_mask: - case X86::BI__builtin_ia32_cvtudq2ps512_mask: - case X86::BI__builtin_ia32_cvtuqq2ps512_mask: - case X86::BI__builtin_ia32_cvtuqq2pd512_mask: - case X86::BI__builtin_ia32_vcvtuw2ph512_mask: - case X86::BI__builtin_ia32_vcvtudq2ph512_mask: - case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: - case X86::BI__builtin_ia32_vfmaddsh3_mask: - case X86::BI__builtin_ia32_vfmaddss3_mask: - case X86::BI__builtin_ia32_vfmaddsd3_mask: - case X86::BI__builtin_ia32_vfmaddsh3_maskz: - case X86::BI__builtin_ia32_vfmaddss3_maskz: - case X86::BI__builtin_ia32_vfmaddsd3_maskz: - case X86::BI__builtin_ia32_vfmaddsh3_mask3: - case X86::BI__builtin_ia32_vfmaddss3_mask3: - case X86::BI__builtin_ia32_vfmaddsd3_mask3: - case X86::BI__builtin_ia32_vfmsubsh3_mask3: - case X86::BI__builtin_ia32_vfmsubss3_mask3: - case X86::BI__builtin_ia32_vfmsubsd3_mask3: - case X86::BI__builtin_ia32_vfmaddph512_mask: - case X86::BI__builtin_ia32_vfmaddph512_maskz: - case X86::BI__builtin_ia32_vfmaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddps512_mask: - case X86::BI__builtin_ia32_vfmaddps512_maskz: - case X86::BI__builtin_ia32_vfmaddps512_mask3: - case X86::BI__builtin_ia32_vfmsubps512_mask3: - case X86::BI__builtin_ia32_vfmaddpd512_mask: - case X86::BI__builtin_ia32_vfmaddpd512_maskz: - case X86::BI__builtin_ia32_vfmaddpd512_mask3: - case X86::BI__builtin_ia32_vfmsubpd512_mask3: - case X86::BI__builtin_ia32_vfmsubph512_mask3: - case X86::BI__builtin_ia32_vfmaddsubph512_mask: - case X86::BI__builtin_ia32_vfmaddsubph512_maskz: - case X86::BI__builtin_ia32_vfmaddsubph512_mask3: - case X86::BI__builtin_ia32_vfmsubaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddsubps512_mask: - case X86::BI__builtin_ia32_vfmaddsubps512_maskz: - case X86::BI__builtin_ia32_vfmaddsubps512_mask3: - case X86::BI__builtin_ia32_vfmsubaddps512_mask3: - case X86::BI__builtin_ia32_vfmaddsubpd512_mask: - case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: - case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: - case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: - case X86::BI__builtin_ia32_movdqa32store128_mask: - case X86::BI__builtin_ia32_movdqa64store128_mask: - case X86::BI__builtin_ia32_storeaps128_mask: - case X86::BI__builtin_ia32_storeapd128_mask: - case X86::BI__builtin_ia32_movdqa32store256_mask: - case X86::BI__builtin_ia32_movdqa64store256_mask: - case X86::BI__builtin_ia32_storeaps256_mask: - case X86::BI__builtin_ia32_storeapd256_mask: - case X86::BI__builtin_ia32_movdqa32store512_mask: - case X86::BI__builtin_ia32_movdqa64store512_mask: - case X86::BI__builtin_ia32_storeaps512_mask: - case X86::BI__builtin_ia32_storeapd512_mask: - case X86::BI__builtin_ia32_loadups128_mask: - case X86::BI__builtin_ia32_loadups256_mask: - case X86::BI__builtin_ia32_loadups512_mask: - case X86::BI__builtin_ia32_loadupd128_mask: - case X86::BI__builtin_ia32_loadupd256_mask: - case X86::BI__builtin_ia32_loadupd512_mask: - case X86::BI__builtin_ia32_loaddquqi128_mask: - case X86::BI__builtin_ia32_loaddquqi256_mask: - case X86::BI__builtin_ia32_loaddquqi512_mask: - case X86::BI__builtin_ia32_loaddquhi128_mask: - case X86::BI__builtin_ia32_loaddquhi256_mask: - case X86::BI__builtin_ia32_loaddquhi512_mask: - case X86::BI__builtin_ia32_loaddqusi128_mask: - case X86::BI__builtin_ia32_loaddqusi256_mask: - case X86::BI__builtin_ia32_loaddqusi512_mask: - case X86::BI__builtin_ia32_loaddqudi128_mask: - case X86::BI__builtin_ia32_loaddqudi256_mask: - case X86::BI__builtin_ia32_loaddqudi512_mask: - case X86::BI__builtin_ia32_loadsbf16128_mask: - case X86::BI__builtin_ia32_loadsh128_mask: - case X86::BI__builtin_ia32_loadss128_mask: - case X86::BI__builtin_ia32_loadsd128_mask: - case X86::BI__builtin_ia32_loadaps128_mask: - case X86::BI__builtin_ia32_loadaps256_mask: - case X86::BI__builtin_ia32_loadaps512_mask: - case X86::BI__builtin_ia32_loadapd128_mask: - case X86::BI__builtin_ia32_loadapd256_mask: - case X86::BI__builtin_ia32_loadapd512_mask: - case X86::BI__builtin_ia32_movdqa32load128_mask: - case X86::BI__builtin_ia32_movdqa32load256_mask: - case X86::BI__builtin_ia32_movdqa32load512_mask: - case X86::BI__builtin_ia32_movdqa64load128_mask: - case X86::BI__builtin_ia32_movdqa64load256_mask: - case X86::BI__builtin_ia32_movdqa64load512_mask: - case X86::BI__builtin_ia32_expandloaddf128_mask: - case X86::BI__builtin_ia32_expandloaddf256_mask: - case X86::BI__builtin_ia32_expandloaddf512_mask: - case X86::BI__builtin_ia32_expandloadsf128_mask: - case X86::BI__builtin_ia32_expandloadsf256_mask: - case X86::BI__builtin_ia32_expandloadsf512_mask: - case X86::BI__builtin_ia32_expandloaddi128_mask: - case X86::BI__builtin_ia32_expandloaddi256_mask: - case X86::BI__builtin_ia32_expandloaddi512_mask: - case X86::BI__builtin_ia32_expandloadsi128_mask: - case X86::BI__builtin_ia32_expandloadsi256_mask: - case X86::BI__builtin_ia32_expandloadsi512_mask: - case X86::BI__builtin_ia32_expandloadhi128_mask: - case X86::BI__builtin_ia32_expandloadhi256_mask: - case X86::BI__builtin_ia32_expandloadhi512_mask: - case X86::BI__builtin_ia32_expandloadqi128_mask: - case X86::BI__builtin_ia32_expandloadqi256_mask: - case X86::BI__builtin_ia32_expandloadqi512_mask: - case X86::BI__builtin_ia32_compressstoredf128_mask: - case X86::BI__builtin_ia32_compressstoredf256_mask: - case X86::BI__builtin_ia32_compressstoredf512_mask: - case X86::BI__builtin_ia32_compressstoresf128_mask: - case X86::BI__builtin_ia32_compressstoresf256_mask: - case X86::BI__builtin_ia32_compressstoresf512_mask: - case X86::BI__builtin_ia32_compressstoredi128_mask: - case X86::BI__builtin_ia32_compressstoredi256_mask: - case X86::BI__builtin_ia32_compressstoredi512_mask: - case X86::BI__builtin_ia32_compressstoresi128_mask: - case X86::BI__builtin_ia32_compressstoresi256_mask: - case X86::BI__builtin_ia32_compressstoresi512_mask: - case X86::BI__builtin_ia32_compressstorehi128_mask: - case X86::BI__builtin_ia32_compressstorehi256_mask: - case X86::BI__builtin_ia32_compressstorehi512_mask: - case X86::BI__builtin_ia32_compressstoreqi128_mask: - case X86::BI__builtin_ia32_compressstoreqi256_mask: - case X86::BI__builtin_ia32_compressstoreqi512_mask: - case X86::BI__builtin_ia32_expanddf128_mask: - case X86::BI__builtin_ia32_expanddf256_mask: - case X86::BI__builtin_ia32_expanddf512_mask: - case X86::BI__builtin_ia32_expandsf128_mask: - case X86::BI__builtin_ia32_expandsf256_mask: - case X86::BI__builtin_ia32_expandsf512_mask: - case X86::BI__builtin_ia32_expanddi128_mask: - case X86::BI__builtin_ia32_expanddi256_mask: - case X86::BI__builtin_ia32_expanddi512_mask: - case X86::BI__builtin_ia32_expandsi128_mask: - case X86::BI__builtin_ia32_expandsi256_mask: - case X86::BI__builtin_ia32_expandsi512_mask: - case X86::BI__builtin_ia32_expandhi128_mask: - case X86::BI__builtin_ia32_expandhi256_mask: - case X86::BI__builtin_ia32_expandhi512_mask: - case X86::BI__builtin_ia32_expandqi128_mask: - case X86::BI__builtin_ia32_expandqi256_mask: - case X86::BI__builtin_ia32_expandqi512_mask: - case X86::BI__builtin_ia32_compressdf128_mask: - case X86::BI__builtin_ia32_compressdf256_mask: - case X86::BI__builtin_ia32_compressdf512_mask: - case X86::BI__builtin_ia32_compresssf128_mask: - case X86::BI__builtin_ia32_compresssf256_mask: - case X86::BI__builtin_ia32_compresssf512_mask: - case X86::BI__builtin_ia32_compressdi128_mask: - case X86::BI__builtin_ia32_compressdi256_mask: - case X86::BI__builtin_ia32_compressdi512_mask: - case X86::BI__builtin_ia32_compresssi128_mask: - case X86::BI__builtin_ia32_compresssi256_mask: - case X86::BI__builtin_ia32_compresssi512_mask: - case X86::BI__builtin_ia32_compresshi128_mask: - case X86::BI__builtin_ia32_compresshi256_mask: - case X86::BI__builtin_ia32_compresshi512_mask: - case X86::BI__builtin_ia32_compressqi128_mask: - case X86::BI__builtin_ia32_compressqi256_mask: - case X86::BI__builtin_ia32_compressqi512_mask: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_gather3div2df: - case X86::BI__builtin_ia32_gather3div2di: - case X86::BI__builtin_ia32_gather3div4df: - case X86::BI__builtin_ia32_gather3div4di: - case X86::BI__builtin_ia32_gather3div4sf: - case X86::BI__builtin_ia32_gather3div4si: - case X86::BI__builtin_ia32_gather3div8sf: - case X86::BI__builtin_ia32_gather3div8si: - case X86::BI__builtin_ia32_gather3siv2df: - case X86::BI__builtin_ia32_gather3siv2di: - case X86::BI__builtin_ia32_gather3siv4df: - case X86::BI__builtin_ia32_gather3siv4di: - case X86::BI__builtin_ia32_gather3siv4sf: - case X86::BI__builtin_ia32_gather3siv4si: - case X86::BI__builtin_ia32_gather3siv8sf: - case X86::BI__builtin_ia32_gather3siv8si: - case X86::BI__builtin_ia32_gathersiv8df: - case X86::BI__builtin_ia32_gathersiv16sf: - case X86::BI__builtin_ia32_gatherdiv8df: - case X86::BI__builtin_ia32_gatherdiv16sf: - case X86::BI__builtin_ia32_gathersiv8di: - case X86::BI__builtin_ia32_gathersiv16si: - case X86::BI__builtin_ia32_gatherdiv8di: - case X86::BI__builtin_ia32_gatherdiv16si: { - StringRef intrinsicName; - switch (builtinID) { - default: - llvm_unreachable("Unexpected builtin"); - case X86::BI__builtin_ia32_gather3div2df: - intrinsicName = "x86.avx512.mask.gather3div2.df"; - break; - case X86::BI__builtin_ia32_gather3div2di: - intrinsicName = "x86.avx512.mask.gather3div2.di"; - break; - case X86::BI__builtin_ia32_gather3div4df: - intrinsicName = "x86.avx512.mask.gather3div4.df"; - break; - case X86::BI__builtin_ia32_gather3div4di: - intrinsicName = "x86.avx512.mask.gather3div4.di"; - break; - case X86::BI__builtin_ia32_gather3div4sf: - intrinsicName = "x86.avx512.mask.gather3div4.sf"; - break; - case X86::BI__builtin_ia32_gather3div4si: - intrinsicName = "x86.avx512.mask.gather3div4.si"; - break; - case X86::BI__builtin_ia32_gather3div8sf: - intrinsicName = "x86.avx512.mask.gather3div8.sf"; - break; - case X86::BI__builtin_ia32_gather3div8si: - intrinsicName = "x86.avx512.mask.gather3div8.si"; - break; - case X86::BI__builtin_ia32_gather3siv2df: - intrinsicName = "x86.avx512.mask.gather3siv2.df"; - break; - case X86::BI__builtin_ia32_gather3siv2di: - intrinsicName = "x86.avx512.mask.gather3siv2.di"; - break; - case X86::BI__builtin_ia32_gather3siv4df: - intrinsicName = "x86.avx512.mask.gather3siv4.df"; - break; - case X86::BI__builtin_ia32_gather3siv4di: - intrinsicName = "x86.avx512.mask.gather3siv4.di"; - break; - case X86::BI__builtin_ia32_gather3siv4sf: - intrinsicName = "x86.avx512.mask.gather3siv4.sf"; - break; - case X86::BI__builtin_ia32_gather3siv4si: - intrinsicName = "x86.avx512.mask.gather3siv4.si"; - break; - case X86::BI__builtin_ia32_gather3siv8sf: - intrinsicName = "x86.avx512.mask.gather3siv8.sf"; - break; - case X86::BI__builtin_ia32_gather3siv8si: - intrinsicName = "x86.avx512.mask.gather3siv8.si"; - break; - case X86::BI__builtin_ia32_gathersiv8df: - intrinsicName = "x86.avx512.mask.gather.dpd.512"; - break; - case X86::BI__builtin_ia32_gathersiv16sf: - intrinsicName = "x86.avx512.mask.gather.dps.512"; - break; - case X86::BI__builtin_ia32_gatherdiv8df: - intrinsicName = "x86.avx512.mask.gather.qpd.512"; - break; - case X86::BI__builtin_ia32_gatherdiv16sf: - intrinsicName = "x86.avx512.mask.gather.qps.512"; - break; - case X86::BI__builtin_ia32_gathersiv8di: - intrinsicName = "x86.avx512.mask.gather.dpq.512"; - break; - case X86::BI__builtin_ia32_gathersiv16si: - intrinsicName = "x86.avx512.mask.gather.dpi.512"; - break; - case X86::BI__builtin_ia32_gatherdiv8di: - intrinsicName = "x86.avx512.mask.gather.qpq.512"; - break; - case X86::BI__builtin_ia32_gatherdiv16si: - intrinsicName = "x86.avx512.mask.gather.qpi.512"; - break; - } - - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned minElts = - std::min(cast(ops[0].getType()).getSize(), - cast(ops[2].getType()).getSize()); - ops[3] = getMaskVecValue(builder, loc, ops[3], minElts); - return emitIntrinsicCallOp(builder, loc, intrinsicName, - convertType(expr->getType()), ops); - } - case X86::BI__builtin_ia32_scattersiv8df: - case X86::BI__builtin_ia32_scattersiv16sf: - case X86::BI__builtin_ia32_scatterdiv8df: - case X86::BI__builtin_ia32_scatterdiv16sf: - case X86::BI__builtin_ia32_scattersiv8di: - case X86::BI__builtin_ia32_scattersiv16si: - case X86::BI__builtin_ia32_scatterdiv8di: - case X86::BI__builtin_ia32_scatterdiv16si: - case X86::BI__builtin_ia32_scatterdiv2df: - case X86::BI__builtin_ia32_scatterdiv2di: - case X86::BI__builtin_ia32_scatterdiv4df: - case X86::BI__builtin_ia32_scatterdiv4di: - case X86::BI__builtin_ia32_scatterdiv4sf: - case X86::BI__builtin_ia32_scatterdiv4si: - case X86::BI__builtin_ia32_scatterdiv8sf: - case X86::BI__builtin_ia32_scatterdiv8si: - case X86::BI__builtin_ia32_scattersiv2df: - case X86::BI__builtin_ia32_scattersiv2di: - case X86::BI__builtin_ia32_scattersiv4df: - case X86::BI__builtin_ia32_scattersiv4di: - case X86::BI__builtin_ia32_scattersiv4sf: - case X86::BI__builtin_ia32_scattersiv4si: - case X86::BI__builtin_ia32_scattersiv8sf: - case X86::BI__builtin_ia32_scattersiv8si: { - llvm::StringRef intrinsicName; - switch (builtinID) { - default: - llvm_unreachable("Unexpected builtin"); - case X86::BI__builtin_ia32_scattersiv8df: - intrinsicName = "x86.avx512.mask.scatter.dpd.512"; - break; - case X86::BI__builtin_ia32_scattersiv16sf: - intrinsicName = "x86.avx512.mask.scatter.dps.512"; - break; - case X86::BI__builtin_ia32_scatterdiv8df: - intrinsicName = "x86.avx512.mask.scatter.qpd.512"; - break; - case X86::BI__builtin_ia32_scatterdiv16sf: - intrinsicName = "x86.avx512.mask.scatter.qps.512"; - break; - case X86::BI__builtin_ia32_scattersiv8di: - intrinsicName = "x86.avx512.mask.scatter.dpq.512"; - break; - case X86::BI__builtin_ia32_scattersiv16si: - intrinsicName = "x86.avx512.mask.scatter.dpi.512"; - break; - case X86::BI__builtin_ia32_scatterdiv8di: - intrinsicName = "x86.avx512.mask.scatter.qpq.512"; - break; - case X86::BI__builtin_ia32_scatterdiv16si: - intrinsicName = "x86.avx512.mask.scatter.qpi.512"; - break; - case X86::BI__builtin_ia32_scatterdiv2df: - intrinsicName = "x86.avx512.mask.scatterdiv2.df"; - break; - case X86::BI__builtin_ia32_scatterdiv2di: - intrinsicName = "x86.avx512.mask.scatterdiv2.di"; - break; - case X86::BI__builtin_ia32_scatterdiv4df: - intrinsicName = "x86.avx512.mask.scatterdiv4.df"; - break; - case X86::BI__builtin_ia32_scatterdiv4di: - intrinsicName = "x86.avx512.mask.scatterdiv4.di"; - break; - case X86::BI__builtin_ia32_scatterdiv4sf: - intrinsicName = "x86.avx512.mask.scatterdiv4.sf"; - break; - case X86::BI__builtin_ia32_scatterdiv4si: - intrinsicName = "x86.avx512.mask.scatterdiv4.si"; - break; - case X86::BI__builtin_ia32_scatterdiv8sf: - intrinsicName = "x86.avx512.mask.scatterdiv8.sf"; - break; - case X86::BI__builtin_ia32_scatterdiv8si: - intrinsicName = "x86.avx512.mask.scatterdiv8.si"; - break; - case X86::BI__builtin_ia32_scattersiv2df: - intrinsicName = "x86.avx512.mask.scattersiv2.df"; - break; - case X86::BI__builtin_ia32_scattersiv2di: - intrinsicName = "x86.avx512.mask.scattersiv2.di"; - break; - case X86::BI__builtin_ia32_scattersiv4df: - intrinsicName = "x86.avx512.mask.scattersiv4.df"; - break; - case X86::BI__builtin_ia32_scattersiv4di: - intrinsicName = "x86.avx512.mask.scattersiv4.di"; - break; - case X86::BI__builtin_ia32_scattersiv4sf: - intrinsicName = "x86.avx512.mask.scattersiv4.sf"; - break; - case X86::BI__builtin_ia32_scattersiv4si: - intrinsicName = "x86.avx512.mask.scattersiv4.si"; - break; - case X86::BI__builtin_ia32_scattersiv8sf: - intrinsicName = "x86.avx512.mask.scattersiv8.sf"; - break; - case X86::BI__builtin_ia32_scattersiv8si: - intrinsicName = "x86.avx512.mask.scattersiv8.si"; - break; - } - - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned minElts = - std::min(cast(ops[2].getType()).getSize(), - cast(ops[3].getType()).getSize()); - ops[1] = getMaskVecValue(builder, loc, ops[1], minElts); - - return emitIntrinsicCallOp(builder, loc, intrinsicName, - convertType(expr->getType()), ops); - } - case X86::BI__builtin_ia32_vextractf128_pd256: - case X86::BI__builtin_ia32_vextractf128_ps256: - case X86::BI__builtin_ia32_vextractf128_si256: - case X86::BI__builtin_ia32_extract128i256: - case X86::BI__builtin_ia32_extractf64x4_mask: - case X86::BI__builtin_ia32_extractf32x4_mask: - case X86::BI__builtin_ia32_extracti64x4_mask: - case X86::BI__builtin_ia32_extracti32x4_mask: - case X86::BI__builtin_ia32_extractf32x8_mask: - case X86::BI__builtin_ia32_extracti32x8_mask: - case X86::BI__builtin_ia32_extractf32x4_256_mask: - case X86::BI__builtin_ia32_extracti32x4_256_mask: - case X86::BI__builtin_ia32_extractf64x2_256_mask: - case X86::BI__builtin_ia32_extracti64x2_256_mask: - case X86::BI__builtin_ia32_extractf64x2_512_mask: - case X86::BI__builtin_ia32_extracti64x2_512_mask: - case X86::BI__builtin_ia32_vinsertf128_pd256: - case X86::BI__builtin_ia32_vinsertf128_ps256: - case X86::BI__builtin_ia32_vinsertf128_si256: - case X86::BI__builtin_ia32_insert128i256: - case X86::BI__builtin_ia32_insertf64x4: - case X86::BI__builtin_ia32_insertf32x4: - case X86::BI__builtin_ia32_inserti64x4: - case X86::BI__builtin_ia32_inserti32x4: - case X86::BI__builtin_ia32_insertf32x8: - case X86::BI__builtin_ia32_inserti32x8: - case X86::BI__builtin_ia32_insertf32x4_256: - case X86::BI__builtin_ia32_inserti32x4_256: - case X86::BI__builtin_ia32_insertf64x2_256: - case X86::BI__builtin_ia32_inserti64x2_256: - case X86::BI__builtin_ia32_insertf64x2_512: - case X86::BI__builtin_ia32_inserti64x2_512: - case X86::BI__builtin_ia32_pmovqd512_mask: - case X86::BI__builtin_ia32_pmovwb512_mask: - case X86::BI__builtin_ia32_pblendw128: - case X86::BI__builtin_ia32_blendpd: - case X86::BI__builtin_ia32_blendps: - case X86::BI__builtin_ia32_blendpd256: - case X86::BI__builtin_ia32_blendps256: - case X86::BI__builtin_ia32_pblendw256: - case X86::BI__builtin_ia32_pblendd128: - case X86::BI__builtin_ia32_pblendd256: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_pshuflw: - case X86::BI__builtin_ia32_pshuflw256: - case X86::BI__builtin_ia32_pshuflw512: - return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), - true); - case X86::BI__builtin_ia32_pshufhw: - case X86::BI__builtin_ia32_pshufhw256: - case X86::BI__builtin_ia32_pshufhw512: - return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), - false); - case X86::BI__builtin_ia32_pshufd: - case X86::BI__builtin_ia32_pshufd256: - case X86::BI__builtin_ia32_pshufd512: - case X86::BI__builtin_ia32_vpermilpd: - case X86::BI__builtin_ia32_vpermilps: - case X86::BI__builtin_ia32_vpermilpd256: - case X86::BI__builtin_ia32_vpermilps256: - case X86::BI__builtin_ia32_vpermilpd512: - case X86::BI__builtin_ia32_vpermilps512: { - const uint32_t imm = getSExtIntValueFromConstOp(ops[1]); - - llvm::SmallVector mask(16); - computeFullLaneShuffleMask(*this, ops[0], imm, false, mask); - - return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask); - } - case X86::BI__builtin_ia32_shufpd: - case X86::BI__builtin_ia32_shufpd256: - case X86::BI__builtin_ia32_shufpd512: - case X86::BI__builtin_ia32_shufps: - case X86::BI__builtin_ia32_shufps256: - case X86::BI__builtin_ia32_shufps512: { - const uint32_t imm = getZExtIntValueFromConstOp(ops[2]); - - llvm::SmallVector mask(16); - computeFullLaneShuffleMask(*this, ops[0], imm, true, mask); - - return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1], - mask); - } - case X86::BI__builtin_ia32_permdi256: - case X86::BI__builtin_ia32_permdf256: - case X86::BI__builtin_ia32_permdi512: - case X86::BI__builtin_ia32_permdf512: - case X86::BI__builtin_ia32_palignr128: - case X86::BI__builtin_ia32_palignr256: - case X86::BI__builtin_ia32_palignr512: - case X86::BI__builtin_ia32_alignd128: - case X86::BI__builtin_ia32_alignd256: - case X86::BI__builtin_ia32_alignd512: - case X86::BI__builtin_ia32_alignq128: - case X86::BI__builtin_ia32_alignq256: - case X86::BI__builtin_ia32_alignq512: - case X86::BI__builtin_ia32_shuf_f32x4_256: - case X86::BI__builtin_ia32_shuf_f64x2_256: - case X86::BI__builtin_ia32_shuf_i32x4_256: - case X86::BI__builtin_ia32_shuf_i64x2_256: - case X86::BI__builtin_ia32_shuf_f32x4: - case X86::BI__builtin_ia32_shuf_f64x2: - case X86::BI__builtin_ia32_shuf_i32x4: - case X86::BI__builtin_ia32_shuf_i64x2: - case X86::BI__builtin_ia32_vperm2f128_pd256: - case X86::BI__builtin_ia32_vperm2f128_ps256: - case X86::BI__builtin_ia32_vperm2f128_si256: - case X86::BI__builtin_ia32_permti256: - case X86::BI__builtin_ia32_pslldqi128_byteshift: - case X86::BI__builtin_ia32_pslldqi256_byteshift: - case X86::BI__builtin_ia32_pslldqi512_byteshift: - case X86::BI__builtin_ia32_psrldqi128_byteshift: - case X86::BI__builtin_ia32_psrldqi256_byteshift: - case X86::BI__builtin_ia32_psrldqi512_byteshift: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_kshiftliqi: - case X86::BI__builtin_ia32_kshiftlihi: - case X86::BI__builtin_ia32_kshiftlisi: - case X86::BI__builtin_ia32_kshiftlidi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned shiftVal = - ops[1].getDefiningOp().getIntValue().getZExtValue() & - 0xff; - unsigned numElems = cast(ops[0].getType()).getWidth(); - - if (shiftVal >= numElems) - return builder.getNullValue(ops[0].getType(), loc); - - mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); - - SmallVector indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (auto i : llvm::seq(0, numElems)) - indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal)); - - mlir::Value zero = builder.getNullValue(in.getType(), loc); - mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices); - return builder.createBitcast(sv, ops[0].getType()); - } - case X86::BI__builtin_ia32_kshiftriqi: - case X86::BI__builtin_ia32_kshiftrihi: - case X86::BI__builtin_ia32_kshiftrisi: - case X86::BI__builtin_ia32_kshiftridi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned shiftVal = - ops[1].getDefiningOp().getIntValue().getZExtValue() & - 0xff; - unsigned numElems = cast(ops[0].getType()).getWidth(); - - if (shiftVal >= numElems) - return builder.getNullValue(ops[0].getType(), loc); - - mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); - - SmallVector indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (auto i : llvm::seq(0, numElems)) - indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal)); - - mlir::Value zero = builder.getNullValue(in.getType(), loc); - mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices); - return builder.createBitcast(sv, ops[0].getType()); - } - case X86::BI__builtin_ia32_vprotbi: - case X86::BI__builtin_ia32_vprotwi: - case X86::BI__builtin_ia32_vprotdi: - case X86::BI__builtin_ia32_vprotqi: - case X86::BI__builtin_ia32_prold128: - case X86::BI__builtin_ia32_prold256: - case X86::BI__builtin_ia32_prold512: - case X86::BI__builtin_ia32_prolq128: - case X86::BI__builtin_ia32_prolq256: - case X86::BI__builtin_ia32_prolq512: - return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], - ops[0], ops[1], false); - case X86::BI__builtin_ia32_prord128: - case X86::BI__builtin_ia32_prord256: - case X86::BI__builtin_ia32_prord512: - case X86::BI__builtin_ia32_prorq128: - case X86::BI__builtin_ia32_prorq256: - case X86::BI__builtin_ia32_prorq512: - return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], - ops[0], ops[1], true); - case X86::BI__builtin_ia32_selectb_128: - case X86::BI__builtin_ia32_selectb_256: - case X86::BI__builtin_ia32_selectb_512: - case X86::BI__builtin_ia32_selectw_128: - case X86::BI__builtin_ia32_selectw_256: - case X86::BI__builtin_ia32_selectw_512: - case X86::BI__builtin_ia32_selectd_128: - case X86::BI__builtin_ia32_selectd_256: - case X86::BI__builtin_ia32_selectd_512: - case X86::BI__builtin_ia32_selectq_128: - case X86::BI__builtin_ia32_selectq_256: - case X86::BI__builtin_ia32_selectq_512: - case X86::BI__builtin_ia32_selectph_128: - case X86::BI__builtin_ia32_selectph_256: - case X86::BI__builtin_ia32_selectph_512: - case X86::BI__builtin_ia32_selectpbf_128: - case X86::BI__builtin_ia32_selectpbf_256: - case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectps_128: - case X86::BI__builtin_ia32_selectps_256: - case X86::BI__builtin_ia32_selectps_512: - case X86::BI__builtin_ia32_selectpd_128: - case X86::BI__builtin_ia32_selectpd_256: - case X86::BI__builtin_ia32_selectpd_512: - case X86::BI__builtin_ia32_selectsh_128: - case X86::BI__builtin_ia32_selectsbf_128: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: - case X86::BI__builtin_ia32_cmpb128_mask: - case X86::BI__builtin_ia32_cmpb256_mask: - case X86::BI__builtin_ia32_cmpb512_mask: - case X86::BI__builtin_ia32_cmpw128_mask: - case X86::BI__builtin_ia32_cmpw256_mask: - case X86::BI__builtin_ia32_cmpw512_mask: - case X86::BI__builtin_ia32_cmpd128_mask: - case X86::BI__builtin_ia32_cmpd256_mask: - case X86::BI__builtin_ia32_cmpd512_mask: - case X86::BI__builtin_ia32_cmpq128_mask: - case X86::BI__builtin_ia32_cmpq256_mask: - case X86::BI__builtin_ia32_cmpq512_mask: - case X86::BI__builtin_ia32_ucmpb128_mask: - case X86::BI__builtin_ia32_ucmpb256_mask: - case X86::BI__builtin_ia32_ucmpb512_mask: - case X86::BI__builtin_ia32_ucmpw128_mask: - case X86::BI__builtin_ia32_ucmpw256_mask: - case X86::BI__builtin_ia32_ucmpw512_mask: - case X86::BI__builtin_ia32_ucmpd128_mask: - case X86::BI__builtin_ia32_ucmpd256_mask: - case X86::BI__builtin_ia32_ucmpd512_mask: - case X86::BI__builtin_ia32_ucmpq128_mask: - case X86::BI__builtin_ia32_ucmpq256_mask: - case X86::BI__builtin_ia32_ucmpq512_mask: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_vpcomb: - case X86::BI__builtin_ia32_vpcomw: - case X86::BI__builtin_ia32_vpcomd: - case X86::BI__builtin_ia32_vpcomq: - return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true); - case X86::BI__builtin_ia32_vpcomub: - case X86::BI__builtin_ia32_vpcomuw: - case X86::BI__builtin_ia32_vpcomud: - case X86::BI__builtin_ia32_vpcomuq: - return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false); - case X86::BI__builtin_ia32_kortestcqi: - case X86::BI__builtin_ia32_kortestchi: - case X86::BI__builtin_ia32_kortestcsi: - case X86::BI__builtin_ia32_kortestcdi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - cir::IntType ty = cast(ops[0].getType()); - mlir::Value allOnesOp = - builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth())); - mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); - mlir::Value cmp = - cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp); - return builder.createCast(cir::CastKind::bool_to_int, cmp, - cgm.convertType(expr->getType())); - } - case X86::BI__builtin_ia32_kortestzqi: - case X86::BI__builtin_ia32_kortestzhi: - case X86::BI__builtin_ia32_kortestzsi: - case X86::BI__builtin_ia32_kortestzdi: { - mlir::Location loc = getLoc(expr->getExprLoc()); - cir::IntType ty = cast(ops[0].getType()); - mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult(); - mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); - mlir::Value cmp = - cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp); - return builder.createCast(cir::CastKind::bool_to_int, cmp, - cgm.convertType(expr->getType())); - } - case X86::BI__builtin_ia32_ktestcqi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.b", ops); - case X86::BI__builtin_ia32_ktestzqi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.b", ops); - case X86::BI__builtin_ia32_ktestchi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.w", ops); - case X86::BI__builtin_ia32_ktestzhi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.w", ops); - case X86::BI__builtin_ia32_ktestcsi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.d", ops); - case X86::BI__builtin_ia32_ktestzsi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.d", ops); - case X86::BI__builtin_ia32_ktestcdi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestc.q", ops); - case X86::BI__builtin_ia32_ktestzdi: - return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), - "x86.avx512.ktestz.q", ops); - case X86::BI__builtin_ia32_kaddqi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.b", ops); - case X86::BI__builtin_ia32_kaddhi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.w", ops); - case X86::BI__builtin_ia32_kaddsi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.d", ops); - case X86::BI__builtin_ia32_kadddi: - return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), - "x86.avx512.kadd.q", ops); - case X86::BI__builtin_ia32_kandqi: - case X86::BI__builtin_ia32_kandhi: - case X86::BI__builtin_ia32_kandsi: - case X86::BI__builtin_ia32_kanddi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::And, ops); - case X86::BI__builtin_ia32_kandnqi: - case X86::BI__builtin_ia32_kandnhi: - case X86::BI__builtin_ia32_kandnsi: - case X86::BI__builtin_ia32_kandndi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::And, ops, true); - case X86::BI__builtin_ia32_korqi: - case X86::BI__builtin_ia32_korhi: - case X86::BI__builtin_ia32_korsi: - case X86::BI__builtin_ia32_kordi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::Or, ops); - case X86::BI__builtin_ia32_kxnorqi: - case X86::BI__builtin_ia32_kxnorhi: - case X86::BI__builtin_ia32_kxnorsi: - case X86::BI__builtin_ia32_kxnordi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::Xor, ops, true); - case X86::BI__builtin_ia32_kxorqi: - case X86::BI__builtin_ia32_kxorhi: - case X86::BI__builtin_ia32_kxorsi: - case X86::BI__builtin_ia32_kxordi: - return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), - cir::BinOpKind::Xor, ops); - case X86::BI__builtin_ia32_knotqi: - case X86::BI__builtin_ia32_knothi: - case X86::BI__builtin_ia32_knotsi: - case X86::BI__builtin_ia32_knotdi: { - cir::IntType intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value resVec = - getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); - return builder.createBitcast(builder.createNot(resVec), ops[0].getType()); - } - case X86::BI__builtin_ia32_kmovb: - case X86::BI__builtin_ia32_kmovw: - case X86::BI__builtin_ia32_kmovd: - case X86::BI__builtin_ia32_kmovq: { - // Bitcast to vXi1 type and then back to integer. This gets the mask - // register type into the IR, but might be optimized out depending on - // what's around it. - cir::IntType intTy = cast(ops[0].getType()); - unsigned numElts = intTy.getWidth(); - mlir::Value resVec = - getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); - return builder.createBitcast(resVec, ops[0].getType()); - } - case X86::BI__builtin_ia32_sqrtsh_round_mask: - case X86::BI__builtin_ia32_sqrtsd_round_mask: - case X86::BI__builtin_ia32_sqrtss_round_mask: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_sqrtph512: - case X86::BI__builtin_ia32_sqrtps512: - case X86::BI__builtin_ia32_sqrtpd512: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value arg = ops[0]; - return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); - } - case X86::BI__builtin_ia32_pmuludq128: - case X86::BI__builtin_ia32_pmuludq256: - case X86::BI__builtin_ia32_pmuludq512: { - unsigned opTypePrimitiveSizeInBits = - cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); - return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false, - ops, opTypePrimitiveSizeInBits); - } - case X86::BI__builtin_ia32_pmuldq128: - case X86::BI__builtin_ia32_pmuldq256: - case X86::BI__builtin_ia32_pmuldq512: { - unsigned opTypePrimitiveSizeInBits = - cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); - return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true, - ops, opTypePrimitiveSizeInBits); - } - case X86::BI__builtin_ia32_pternlogd512_mask: - case X86::BI__builtin_ia32_pternlogq512_mask: - case X86::BI__builtin_ia32_pternlogd128_mask: - case X86::BI__builtin_ia32_pternlogd256_mask: - case X86::BI__builtin_ia32_pternlogq128_mask: - case X86::BI__builtin_ia32_pternlogq256_mask: - case X86::BI__builtin_ia32_pternlogd512_maskz: - case X86::BI__builtin_ia32_pternlogq512_maskz: - case X86::BI__builtin_ia32_pternlogd128_maskz: - case X86::BI__builtin_ia32_pternlogd256_maskz: - case X86::BI__builtin_ia32_pternlogq128_maskz: - case X86::BI__builtin_ia32_pternlogq256_maskz: - case X86::BI__builtin_ia32_vpshldd128: - case X86::BI__builtin_ia32_vpshldd256: - case X86::BI__builtin_ia32_vpshldd512: - case X86::BI__builtin_ia32_vpshldq128: - case X86::BI__builtin_ia32_vpshldq256: - case X86::BI__builtin_ia32_vpshldq512: - case X86::BI__builtin_ia32_vpshldw128: - case X86::BI__builtin_ia32_vpshldw256: - case X86::BI__builtin_ia32_vpshldw512: - case X86::BI__builtin_ia32_vpshrdd128: - case X86::BI__builtin_ia32_vpshrdd256: - case X86::BI__builtin_ia32_vpshrdd512: - case X86::BI__builtin_ia32_vpshrdq128: - case X86::BI__builtin_ia32_vpshrdq256: - case X86::BI__builtin_ia32_vpshrdq512: - case X86::BI__builtin_ia32_vpshrdw128: - case X86::BI__builtin_ia32_vpshrdw256: - case X86::BI__builtin_ia32_vpshrdw512: - case X86::BI__builtin_ia32_reduce_fadd_pd512: - case X86::BI__builtin_ia32_reduce_fadd_ps512: - case X86::BI__builtin_ia32_reduce_fadd_ph512: - case X86::BI__builtin_ia32_reduce_fadd_ph256: - case X86::BI__builtin_ia32_reduce_fadd_ph128: - case X86::BI__builtin_ia32_reduce_fmul_pd512: - case X86::BI__builtin_ia32_reduce_fmul_ps512: - case X86::BI__builtin_ia32_reduce_fmul_ph512: - case X86::BI__builtin_ia32_reduce_fmul_ph256: - case X86::BI__builtin_ia32_reduce_fmul_ph128: - case X86::BI__builtin_ia32_reduce_fmax_pd512: - case X86::BI__builtin_ia32_reduce_fmax_ps512: - case X86::BI__builtin_ia32_reduce_fmax_ph512: - case X86::BI__builtin_ia32_reduce_fmax_ph256: - case X86::BI__builtin_ia32_reduce_fmax_ph128: - case X86::BI__builtin_ia32_reduce_fmin_pd512: - case X86::BI__builtin_ia32_reduce_fmin_ps512: - case X86::BI__builtin_ia32_reduce_fmin_ph512: - case X86::BI__builtin_ia32_reduce_fmin_ph256: - case X86::BI__builtin_ia32_reduce_fmin_ph128: - case X86::BI__builtin_ia32_rdrand16_step: - case X86::BI__builtin_ia32_rdrand32_step: - case X86::BI__builtin_ia32_rdrand64_step: - case X86::BI__builtin_ia32_rdseed16_step: - case X86::BI__builtin_ia32_rdseed32_step: - case X86::BI__builtin_ia32_rdseed64_step: - case X86::BI__builtin_ia32_addcarryx_u32: - case X86::BI__builtin_ia32_addcarryx_u64: - case X86::BI__builtin_ia32_subborrow_u32: - case X86::BI__builtin_ia32_subborrow_u64: - case X86::BI__builtin_ia32_fpclassps128_mask: - case X86::BI__builtin_ia32_fpclassps256_mask: - case X86::BI__builtin_ia32_fpclassps512_mask: - case X86::BI__builtin_ia32_vfpclassbf16128_mask: - case X86::BI__builtin_ia32_vfpclassbf16256_mask: - case X86::BI__builtin_ia32_vfpclassbf16512_mask: - case X86::BI__builtin_ia32_fpclassph128_mask: - case X86::BI__builtin_ia32_fpclassph256_mask: - case X86::BI__builtin_ia32_fpclassph512_mask: - case X86::BI__builtin_ia32_fpclasspd128_mask: - case X86::BI__builtin_ia32_fpclasspd256_mask: - case X86::BI__builtin_ia32_fpclasspd512_mask: - case X86::BI__builtin_ia32_vp2intersect_q_512: - case X86::BI__builtin_ia32_vp2intersect_q_256: - case X86::BI__builtin_ia32_vp2intersect_q_128: - case X86::BI__builtin_ia32_vp2intersect_d_512: - case X86::BI__builtin_ia32_vp2intersect_d_256: - case X86::BI__builtin_ia32_vp2intersect_d_128: - case X86::BI__builtin_ia32_vpmultishiftqb128: - case X86::BI__builtin_ia32_vpmultishiftqb256: - case X86::BI__builtin_ia32_vpmultishiftqb512: - case X86::BI__builtin_ia32_vpshufbitqmb128_mask: - case X86::BI__builtin_ia32_vpshufbitqmb256_mask: - case X86::BI__builtin_ia32_vpshufbitqmb512_mask: - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqpd: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltpd: - return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), - cir::CmpOpKind::lt, /*shouldInvert=*/true); - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnlepd: - return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), - cir::CmpOpKind::le, /*shouldInvert=*/true); - case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordpd: - case X86::BI__builtin_ia32_cmpph128_mask: - case X86::BI__builtin_ia32_cmpph256_mask: - case X86::BI__builtin_ia32_cmpph512_mask: - case X86::BI__builtin_ia32_cmpps128_mask: - case X86::BI__builtin_ia32_cmpps256_mask: - case X86::BI__builtin_ia32_cmpps512_mask: - case X86::BI__builtin_ia32_cmppd128_mask: - case X86::BI__builtin_ia32_cmppd256_mask: - case X86::BI__builtin_ia32_cmppd512_mask: - case X86::BI__builtin_ia32_vcmpbf16512_mask: - case X86::BI__builtin_ia32_vcmpbf16256_mask: - case X86::BI__builtin_ia32_vcmpbf16128_mask: - case X86::BI__builtin_ia32_cmpps: - case X86::BI__builtin_ia32_cmpps256: - case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpeqsd: - case X86::BI__builtin_ia32_cmpltsd: - case X86::BI__builtin_ia32_cmplesd: - case X86::BI__builtin_ia32_cmpunordsd: - case X86::BI__builtin_ia32_cmpneqsd: - case X86::BI__builtin_ia32_cmpnltsd: - case X86::BI__builtin_ia32_cmpnlesd: - case X86::BI__builtin_ia32_cmpordsd: - case X86::BI__builtin_ia32_vcvtph2ps_mask: - case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: - case X86::BI__cpuid: - case X86::BI__cpuidex: - case X86::BI__emul: - case X86::BI__emulu: - case X86::BI__mulh: - case X86::BI__umulh: - case X86::BI_mul128: - case X86::BI_umul128: - case X86::BI__faststorefence: - case X86::BI__shiftleft128: - case X86::BI__shiftright128: - case X86::BI_ReadWriteBarrier: - case X86::BI_ReadBarrier: - case X86::BI_WriteBarrier: - case X86::BI_AddressOfReturnAddress: - case X86::BI__stosb: - case X86::BI__ud2: - case X86::BI__int2c: - case X86::BI__readfsbyte: - case X86::BI__readfsword: - case X86::BI__readfsdword: - case X86::BI__readfsqword: - case X86::BI__readgsbyte: - case X86::BI__readgsword: - case X86::BI__readgsdword: - case X86::BI__readgsqword: - case X86::BI__builtin_ia32_encodekey128_u32: - case X86::BI__builtin_ia32_encodekey256_u32: - case X86::BI__builtin_ia32_aesenc128kl_u8: - case X86::BI__builtin_ia32_aesdec128kl_u8: - case X86::BI__builtin_ia32_aesenc256kl_u8: - case X86::BI__builtin_ia32_aesdec256kl_u8: - case X86::BI__builtin_ia32_aesencwide128kl_u8: - case X86::BI__builtin_ia32_aesdecwide128kl_u8: - case X86::BI__builtin_ia32_aesencwide256kl_u8: - case X86::BI__builtin_ia32_aesdecwide256kl_u8: - case X86::BI__builtin_ia32_vfcmaddcph512_mask: - case X86::BI__builtin_ia32_vfmaddcph512_mask: - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: - case X86::BI__builtin_ia32_vfmaddcsh_round_mask: - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: - case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: - case X86::BI__builtin_ia32_prefetchi: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; - } -} +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit x86/x86_64 Builtin calls as CIR or a function +// call to be later resolved. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenBuilder.h" +#include "CIRGenFunction.h" +#include "CIRGenModule.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/ValueRange.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/MissingFeatures.h" + +using namespace clang; +using namespace clang::CIRGen; + +template +static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, + mlir::Location loc, const StringRef str, + const mlir::Type &resTy, + Operands &&...op) { + return cir::LLVMIntrinsicCallOp::create(builder, loc, + builder.getStringAttr(str), resTy, + std::forward(op)...) + .getResult(); +} + +// OG has unordered comparison as a form of optimization in addition to +// ordered comparison, while CIR doesn't. +// +// This means that we can't encode the comparison code of UGT (unordered +// greater than), at least not at the CIR level. +// +// The boolean shouldInvert compensates for this. +// For example: to get to the comparison code UGT, we pass in +// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT. + +// There are several ways to support this otherwise: +// - register extra CmpOpKind for unordered comparison types and build the +// translation code for +// to go from CIR -> LLVM dialect. Notice we get this naturally with +// shouldInvert, benefiting from existing infrastructure, albeit having to +// generate an extra `not` at CIR). +// - Just add extra comparison code to a new VecCmpOpKind instead of +// cluttering CmpOpKind. +// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered +// comparison +// - Just emit the intrinsics call instead of calling this helper, see how the +// LLVM lowering handles this. +static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, + llvm::SmallVector &ops, + mlir::Location loc, cir::CmpOpKind pred, + bool shouldInvert) { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]); + mlir::Value bitCast = builder.createBitcast( + shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType()); + return bitCast; +} + +static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Value mask, unsigned numElems) { + auto maskTy = cir::VectorType::get( + builder.getUIntNTy(1), cast(mask.getType()).getWidth()); + mlir::Value maskVec = builder.createBitcast(mask, maskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (numElems < 8) { + SmallVector indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, i)); + + maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); + } + return maskVec; +} + +// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins. +// +// The vector is split into lanes of 8 word elements (16 bits). The lower or +// upper half of each lane, controlled by `isLow`, is shuffled in the following +// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The +// i-th field's value represents the resulting index of the i-th element in the +// half lane after shuffling. The other half of the lane remains unchanged. +static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, + const mlir::Value vec, + const mlir::Value immediate, + const mlir::Location loc, + const bool isLow) { + uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate); + + auto vecTy = cast(vec.getType()); + unsigned numElts = vecTy.getSize(); + + unsigned firstHalfStart = isLow ? 0 : 4; + unsigned secondHalfStart = 4 - firstHalfStart; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + imm = (imm & 0xff) * 0x01010101; + + int64_t indices[32]; + for (unsigned l = 0; l != numElts; l += 8) { + for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) { + indices[l + i] = l + (imm & 3) + firstHalfStart; + imm >>= 2; + } + for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i) + indices[l + i] = l + i; + } + + return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts)); +} + +// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins. +// The shuffle mask is written to outIndices. +static void +computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, + uint32_t imm, const bool isShufP, + llvm::SmallVectorImpl &outIndices) { + auto vecTy = cast(vec.getType()); + unsigned numElts = vecTy.getSize(); + unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128; + unsigned numLaneElts = numElts / numLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + imm = (imm & 0xff) * 0x01010101; + + for (unsigned l = 0; l != numElts; l += numLaneElts) { + for (unsigned i = 0; i != numLaneElts; ++i) { + uint32_t idx = imm % numLaneElts; + imm /= numLaneElts; + if (isShufP && i >= (numLaneElts / 2)) + idx += numElts; + outIndices[l + i] = l + idx; + } + } + + outIndices.resize(numElts); +} + +static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + + auto intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); + mlir::Type vecTy = lhsVec.getType(); + mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy, + mlir::ValueRange{lhsVec, rhsVec}); + return builder.createBitcast(resVec, ops[0].getType()); +} + +static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + unsigned numElems = cast(ops[0].getType()).getWidth(); + + // Convert both operands to mask vectors. + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems); + + mlir::Type i32Ty = builder.getSInt32Ty(); + + // Create indices for extracting the first half of each vector. + SmallVector halfIndices; + for (auto i : llvm::seq(0, numElems / 2)) + halfIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Extract first half of each vector. This gives better codegen than + // doing it in a single shuffle. + mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices); + mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices); + + // Create indices for concatenating the vectors. + // NOTE: Operands are swapped to match the intrinsic definition. + // After the half extraction, both vectors have numElems/2 elements. + // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] + // select from rhsHalf, and indices [numElems/2..numElems-1] select from + // lhsHalf. + SmallVector concatIndices; + for (auto i : llvm::seq(0, numElems)) + concatIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Concat the vectors (RHS first, then LHS). + mlir::Value res = + builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices); + return builder.createBitcast(res, ops[0].getType()); +} + +static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, + mlir::Location loc, + cir::BinOpKind binOpKind, + SmallVectorImpl &ops, + bool invertLHS = false) { + unsigned numElts = cast(ops[0].getType()).getWidth(); + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts); + + if (invertLHS) + lhs = builder.createNot(lhs); + return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs), + ops[0].getType()); +} + +static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + auto intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); + mlir::Type resTy = builder.getSInt32Ty(); + return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, + mlir::ValueRange{lhsVec, rhsVec}); +} + +static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Value vec, mlir::Value value, + mlir::Value indexOp) { + unsigned numElts = cast(vec.getType()).getSize(); + + uint64_t index = + indexOp.getDefiningOp().getIntValue().getZExtValue(); + + index &= numElts - 1; + + cir::ConstantOp indexVal = builder.getUInt64(index, loc); + + return cir::VecInsertOp::create(builder, loc, vec, value, indexVal); +} + +static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, + mlir::Location location, mlir::Value &op0, + mlir::Value &op1, mlir::Value &amt, + bool isRight) { + mlir::Type op0Ty = op0.getType(); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 + // so we only care about the lowest log2 bits anyway. + if (amt.getType() != op0Ty) { + auto vecTy = mlir::cast(op0Ty); + uint64_t numElems = vecTy.getSize(); + + auto amtTy = mlir::cast(amt.getType()); + auto vecElemTy = mlir::cast(vecTy.getElementType()); + + // If signed, cast to the same width but unsigned first to + // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`. + if (amtTy.isSigned()) { + cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth()); + amt = builder.createIntCast(amt, unsignedAmtTy); + } + cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth()); + amt = builder.createIntCast(amt, unsignedVecElemType); + amt = cir::VecSplatOp::create( + builder, location, cir::VectorType::get(unsignedVecElemType, numElems), + amt); + } + + const StringRef intrinsicName = isRight ? "fshr" : "fshl"; + return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty, + mlir::ValueRange{op0, op1, amt}); +} + +static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, + bool isSigned, + SmallVectorImpl &ops, + unsigned opTypePrimitiveSizeInBits) { + mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(), + opTypePrimitiveSizeInBits / 64); + mlir::Value lhs = builder.createBitcast(loc, ops[0], ty); + mlir::Value rhs = builder.createBitcast(loc, ops[1], ty); + if (isSigned) { + cir::ConstantOp shiftAmt = + builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32)); + cir::VecSplatOp shiftSplatVecOp = + cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult()); + mlir::Value shiftSplatValue = shiftSplatVecOp.getResult(); + // In CIR, right-shift operations are automatically lowered to either an + // arithmetic or logical shift depending on the operand type. The purpose + // of the shifts here is to propagate the sign bit of the 32-bit input + // into the upper bits of each vector lane. + lhs = builder.createShift(loc, lhs, shiftSplatValue, true); + lhs = builder.createShift(loc, lhs, shiftSplatValue, false); + rhs = builder.createShift(loc, rhs, shiftSplatValue, true); + rhs = builder.createShift(loc, rhs, shiftSplatValue, false); + } else { + cir::ConstantOp maskScalar = builder.getConstant( + loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff)); + cir::VecSplatOp mask = + cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult()); + // Clear the upper bits + lhs = builder.createAnd(loc, lhs, mask); + rhs = builder.createAnd(loc, rhs, mask); + } + return builder.createMul(loc, lhs, rhs); +} + +static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, + llvm::SmallVector ops, + bool isSigned) { + mlir::Value op0 = ops[0]; + mlir::Value op1 = ops[1]; + + cir::VectorType ty = cast(op0.getType()); + cir::IntType elementTy = cast(ty.getElementType()); + + uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7; + + cir::CmpOpKind pred; + switch (imm) { + case 0x0: + pred = cir::CmpOpKind::lt; + break; + case 0x1: + pred = cir::CmpOpKind::le; + break; + case 0x2: + pred = cir::CmpOpKind::gt; + break; + case 0x3: + pred = cir::CmpOpKind::ge; + break; + case 0x4: + pred = cir::CmpOpKind::eq; + break; + case 0x5: + pred = cir::CmpOpKind::ne; + break; + case 0x6: + return builder.getNullValue(ty, loc); // FALSE + case 0x7: { + llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth()); + return cir::VecSplatOp::create( + builder, loc, ty, + builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE + } + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + if ((!isSigned && elementTy.isSigned()) || + (isSigned && elementTy.isUnsigned())) { + elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth()) + : builder.getSIntNTy(elementTy.getWidth()); + ty = cir::VectorType::get(elementTy, ty.getSize()); + op0 = builder.createBitcast(op0, ty); + op1 = builder.createBitcast(op1, ty); + } + + return builder.createVecCompare(loc, pred, op0, op1); +} + +mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, + const CallExpr *expr) { + if (builtinID == Builtin::BI__builtin_cpu_is) { + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is"); + return {}; + } + if (builtinID == Builtin::BI__builtin_cpu_supports) { + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports"); + return {}; + } + if (builtinID == Builtin::BI__builtin_cpu_init) { + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init"); + return {}; + } + + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + assert(!cir::MissingFeatures::msvcBuiltins()); + + // Find out if any arguments are required to be integer constant expressions. + assert(!cir::MissingFeatures::handleBuiltinICEArguments()); + + // The operands of the builtin call + llvm::SmallVector ops; + + // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit + // is required to be a constant integer expression. + unsigned iceArguments = 0; + ASTContext::GetBuiltinTypeError error; + getContext().GetBuiltinType(builtinID, error, &iceArguments); + assert(error == ASTContext::GE_None && "Error while getting builtin type."); + + for (auto [idx, arg] : llvm::enumerate(expr->arguments())) + ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); + + CIRGenBuilderTy &builder = getBuilder(); + mlir::Type voidTy = builder.getVoidTy(); + + switch (builtinID) { + default: + return {}; + case X86::BI_mm_clflush: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.clflush", voidTy, ops[0]); + case X86::BI_mm_lfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.lfence", voidTy); + case X86::BI_mm_pause: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.pause", voidTy); + case X86::BI_mm_mfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.mfence", voidTy); + case X86::BI_mm_sfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse.sfence", voidTy); + case X86::BI_mm_prefetch: + case X86::BI__rdtsc: + case X86::BI__builtin_ia32_rdtscp: { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + case X86::BI__builtin_ia32_lzcnt_u16: + case X86::BI__builtin_ia32_lzcnt_u32: + case X86::BI__builtin_ia32_lzcnt_u64: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value isZeroPoison = builder.getFalse(loc); + return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } + case X86::BI__builtin_ia32_tzcnt_u16: + case X86::BI__builtin_ia32_tzcnt_u32: + case X86::BI__builtin_ia32_tzcnt_u64: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value isZeroPoison = builder.getFalse(loc); + return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } + case X86::BI__builtin_ia32_undef128: + case X86::BI__builtin_ia32_undef256: + case X86::BI__builtin_ia32_undef512: + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return builder.getNullValue(convertType(expr->getType()), + getLoc(expr->getExprLoc())); + case X86::BI__builtin_ia32_vec_ext_v4hi: + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v4sf: + case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vec_ext_v32qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned numElts = cast(ops[0].getType()).getSize(); + + uint64_t index = getZExtIntValueFromConstOp(ops[1]); + index &= numElts - 1; + + cir::ConstantOp indexVal = + builder.getUInt64(index, getLoc(expr->getExprLoc())); + + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()), + ops[0], indexVal); + } + case X86::BI__builtin_ia32_vec_set_v4hi: + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vec_set_v32qi: + case X86::BI__builtin_ia32_vec_set_v16hi: + case X86::BI__builtin_ia32_vec_set_v8si: + case X86::BI__builtin_ia32_vec_set_v4di: { + return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1], + ops[2]); + } + case X86::BI__builtin_ia32_kunpckhi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackb", ops); + case X86::BI__builtin_ia32_kunpcksi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackw", ops); + case X86::BI__builtin_ia32_kunpckdi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackd", ops); + case X86::BI_mm_setcsr: + case X86::BI__builtin_ia32_ldmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); + builder.createStore(loc, ops[0], tmp); + return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr", + builder.getVoidTy(), tmp.getPointer()); + } + case X86::BI_mm_getcsr: + case X86::BI__builtin_ia32_stmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getType(), loc); + emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(), + tmp.getPointer()); + return builder.createLoad(loc, tmp); + } + case X86::BI__builtin_ia32_xsave: + case X86::BI__builtin_ia32_xsave64: + case X86::BI__builtin_ia32_xrstor: + case X86::BI__builtin_ia32_xrstor64: + case X86::BI__builtin_ia32_xsaveopt: + case X86::BI__builtin_ia32_xsaveopt64: + case X86::BI__builtin_ia32_xrstors: + case X86::BI__builtin_ia32_xrstors64: + case X86::BI__builtin_ia32_xsavec: + case X86::BI__builtin_ia32_xsavec64: + case X86::BI__builtin_ia32_xsaves: + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + case X86::BI__builtin_ia32_storedqudi128_mask: + case X86::BI__builtin_ia32_storedqusi128_mask: + case X86::BI__builtin_ia32_storedquhi128_mask: + case X86::BI__builtin_ia32_storedquqi128_mask: + case X86::BI__builtin_ia32_storeupd128_mask: + case X86::BI__builtin_ia32_storeups128_mask: + case X86::BI__builtin_ia32_storedqudi256_mask: + case X86::BI__builtin_ia32_storedqusi256_mask: + case X86::BI__builtin_ia32_storedquhi256_mask: + case X86::BI__builtin_ia32_storedquqi256_mask: + case X86::BI__builtin_ia32_storeupd256_mask: + case X86::BI__builtin_ia32_storeups256_mask: + case X86::BI__builtin_ia32_storedqudi512_mask: + case X86::BI__builtin_ia32_storedqusi512_mask: + case X86::BI__builtin_ia32_storedquhi512_mask: + case X86::BI__builtin_ia32_storedquqi512_mask: + case X86::BI__builtin_ia32_storeupd512_mask: + case X86::BI__builtin_ia32_storeups512_mask: + case X86::BI__builtin_ia32_storesbf16128_mask: + case X86::BI__builtin_ia32_storesh128_mask: + case X86::BI__builtin_ia32_storess128_mask: + case X86::BI__builtin_ia32_storesd128_mask: + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtw2ph512_mask: + case X86::BI__builtin_ia32_vcvtdq2ph512_mask: + case X86::BI__builtin_ia32_vcvtqq2ph512_mask: + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtuw2ph512_mask: + case X86::BI__builtin_ia32_vcvtudq2ph512_mask: + case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: + case X86::BI__builtin_ia32_vfmaddsh3_mask: + case X86::BI__builtin_ia32_vfmaddss3_mask: + case X86::BI__builtin_ia32_vfmaddsd3_mask: + case X86::BI__builtin_ia32_vfmaddsh3_maskz: + case X86::BI__builtin_ia32_vfmaddss3_maskz: + case X86::BI__builtin_ia32_vfmaddsd3_maskz: + case X86::BI__builtin_ia32_vfmaddsh3_mask3: + case X86::BI__builtin_ia32_vfmaddss3_mask3: + case X86::BI__builtin_ia32_vfmaddsd3_mask3: + case X86::BI__builtin_ia32_vfmsubsh3_mask3: + case X86::BI__builtin_ia32_vfmsubss3_mask3: + case X86::BI__builtin_ia32_vfmsubsd3_mask3: + case X86::BI__builtin_ia32_vfmaddph512_mask: + case X86::BI__builtin_ia32_vfmaddph512_maskz: + case X86::BI__builtin_ia32_vfmaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddps512_mask: + case X86::BI__builtin_ia32_vfmaddps512_maskz: + case X86::BI__builtin_ia32_vfmaddps512_mask3: + case X86::BI__builtin_ia32_vfmsubps512_mask3: + case X86::BI__builtin_ia32_vfmaddpd512_mask: + case X86::BI__builtin_ia32_vfmaddpd512_maskz: + case X86::BI__builtin_ia32_vfmaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubph512_mask3: + case X86::BI__builtin_ia32_vfmaddsubph512_mask: + case X86::BI__builtin_ia32_vfmaddsubph512_maskz: + case X86::BI__builtin_ia32_vfmaddsubph512_mask3: + case X86::BI__builtin_ia32_vfmsubaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddsubps512_mask: + case X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case X86::BI__builtin_ia32_movdqa32store128_mask: + case X86::BI__builtin_ia32_movdqa64store128_mask: + case X86::BI__builtin_ia32_storeaps128_mask: + case X86::BI__builtin_ia32_storeapd128_mask: + case X86::BI__builtin_ia32_movdqa32store256_mask: + case X86::BI__builtin_ia32_movdqa64store256_mask: + case X86::BI__builtin_ia32_storeaps256_mask: + case X86::BI__builtin_ia32_storeapd256_mask: + case X86::BI__builtin_ia32_movdqa32store512_mask: + case X86::BI__builtin_ia32_movdqa64store512_mask: + case X86::BI__builtin_ia32_storeaps512_mask: + case X86::BI__builtin_ia32_storeapd512_mask: + case X86::BI__builtin_ia32_loadups128_mask: + case X86::BI__builtin_ia32_loadups256_mask: + case X86::BI__builtin_ia32_loadups512_mask: + case X86::BI__builtin_ia32_loadupd128_mask: + case X86::BI__builtin_ia32_loadupd256_mask: + case X86::BI__builtin_ia32_loadupd512_mask: + case X86::BI__builtin_ia32_loaddquqi128_mask: + case X86::BI__builtin_ia32_loaddquqi256_mask: + case X86::BI__builtin_ia32_loaddquqi512_mask: + case X86::BI__builtin_ia32_loaddquhi128_mask: + case X86::BI__builtin_ia32_loaddquhi256_mask: + case X86::BI__builtin_ia32_loaddquhi512_mask: + case X86::BI__builtin_ia32_loaddqusi128_mask: + case X86::BI__builtin_ia32_loaddqusi256_mask: + case X86::BI__builtin_ia32_loaddqusi512_mask: + case X86::BI__builtin_ia32_loaddqudi128_mask: + case X86::BI__builtin_ia32_loaddqudi256_mask: + case X86::BI__builtin_ia32_loaddqudi512_mask: + case X86::BI__builtin_ia32_loadsbf16128_mask: + case X86::BI__builtin_ia32_loadsh128_mask: + case X86::BI__builtin_ia32_loadss128_mask: + case X86::BI__builtin_ia32_loadsd128_mask: + case X86::BI__builtin_ia32_loadaps128_mask: + case X86::BI__builtin_ia32_loadaps256_mask: + case X86::BI__builtin_ia32_loadaps512_mask: + case X86::BI__builtin_ia32_loadapd128_mask: + case X86::BI__builtin_ia32_loadapd256_mask: + case X86::BI__builtin_ia32_loadapd512_mask: + case X86::BI__builtin_ia32_movdqa32load128_mask: + case X86::BI__builtin_ia32_movdqa32load256_mask: + case X86::BI__builtin_ia32_movdqa32load512_mask: + case X86::BI__builtin_ia32_movdqa64load128_mask: + case X86::BI__builtin_ia32_movdqa64load256_mask: + case X86::BI__builtin_ia32_movdqa64load512_mask: + case X86::BI__builtin_ia32_expandloaddf128_mask: + case X86::BI__builtin_ia32_expandloaddf256_mask: + case X86::BI__builtin_ia32_expandloaddf512_mask: + case X86::BI__builtin_ia32_expandloadsf128_mask: + case X86::BI__builtin_ia32_expandloadsf256_mask: + case X86::BI__builtin_ia32_expandloadsf512_mask: + case X86::BI__builtin_ia32_expandloaddi128_mask: + case X86::BI__builtin_ia32_expandloaddi256_mask: + case X86::BI__builtin_ia32_expandloaddi512_mask: + case X86::BI__builtin_ia32_expandloadsi128_mask: + case X86::BI__builtin_ia32_expandloadsi256_mask: + case X86::BI__builtin_ia32_expandloadsi512_mask: + case X86::BI__builtin_ia32_expandloadhi128_mask: + case X86::BI__builtin_ia32_expandloadhi256_mask: + case X86::BI__builtin_ia32_expandloadhi512_mask: + case X86::BI__builtin_ia32_expandloadqi128_mask: + case X86::BI__builtin_ia32_expandloadqi256_mask: + case X86::BI__builtin_ia32_expandloadqi512_mask: + case X86::BI__builtin_ia32_compressstoredf128_mask: + case X86::BI__builtin_ia32_compressstoredf256_mask: + case X86::BI__builtin_ia32_compressstoredf512_mask: + case X86::BI__builtin_ia32_compressstoresf128_mask: + case X86::BI__builtin_ia32_compressstoresf256_mask: + case X86::BI__builtin_ia32_compressstoresf512_mask: + case X86::BI__builtin_ia32_compressstoredi128_mask: + case X86::BI__builtin_ia32_compressstoredi256_mask: + case X86::BI__builtin_ia32_compressstoredi512_mask: + case X86::BI__builtin_ia32_compressstoresi128_mask: + case X86::BI__builtin_ia32_compressstoresi256_mask: + case X86::BI__builtin_ia32_compressstoresi512_mask: + case X86::BI__builtin_ia32_compressstorehi128_mask: + case X86::BI__builtin_ia32_compressstorehi256_mask: + case X86::BI__builtin_ia32_compressstorehi512_mask: + case X86::BI__builtin_ia32_compressstoreqi128_mask: + case X86::BI__builtin_ia32_compressstoreqi256_mask: + case X86::BI__builtin_ia32_compressstoreqi512_mask: + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + intrinsicName = "x86.avx512.mask.gather3div2.df"; + break; + case X86::BI__builtin_ia32_gather3div2di: + intrinsicName = "x86.avx512.mask.gather3div2.di"; + break; + case X86::BI__builtin_ia32_gather3div4df: + intrinsicName = "x86.avx512.mask.gather3div4.df"; + break; + case X86::BI__builtin_ia32_gather3div4di: + intrinsicName = "x86.avx512.mask.gather3div4.di"; + break; + case X86::BI__builtin_ia32_gather3div4sf: + intrinsicName = "x86.avx512.mask.gather3div4.sf"; + break; + case X86::BI__builtin_ia32_gather3div4si: + intrinsicName = "x86.avx512.mask.gather3div4.si"; + break; + case X86::BI__builtin_ia32_gather3div8sf: + intrinsicName = "x86.avx512.mask.gather3div8.sf"; + break; + case X86::BI__builtin_ia32_gather3div8si: + intrinsicName = "x86.avx512.mask.gather3div8.si"; + break; + case X86::BI__builtin_ia32_gather3siv2df: + intrinsicName = "x86.avx512.mask.gather3siv2.df"; + break; + case X86::BI__builtin_ia32_gather3siv2di: + intrinsicName = "x86.avx512.mask.gather3siv2.di"; + break; + case X86::BI__builtin_ia32_gather3siv4df: + intrinsicName = "x86.avx512.mask.gather3siv4.df"; + break; + case X86::BI__builtin_ia32_gather3siv4di: + intrinsicName = "x86.avx512.mask.gather3siv4.di"; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + intrinsicName = "x86.avx512.mask.gather3siv4.sf"; + break; + case X86::BI__builtin_ia32_gather3siv4si: + intrinsicName = "x86.avx512.mask.gather3siv4.si"; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + intrinsicName = "x86.avx512.mask.gather3siv8.sf"; + break; + case X86::BI__builtin_ia32_gather3siv8si: + intrinsicName = "x86.avx512.mask.gather3siv8.si"; + break; + case X86::BI__builtin_ia32_gathersiv8df: + intrinsicName = "x86.avx512.mask.gather.dpd.512"; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + intrinsicName = "x86.avx512.mask.gather.dps.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + intrinsicName = "x86.avx512.mask.gather.qpd.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + intrinsicName = "x86.avx512.mask.gather.qps.512"; + break; + case X86::BI__builtin_ia32_gathersiv8di: + intrinsicName = "x86.avx512.mask.gather.dpq.512"; + break; + case X86::BI__builtin_ia32_gathersiv16si: + intrinsicName = "x86.avx512.mask.gather.dpi.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + intrinsicName = "x86.avx512.mask.gather.qpq.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + intrinsicName = "x86.avx512.mask.gather.qpi.512"; + break; + } + + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned minElts = + std::min(cast(ops[0].getType()).getSize(), + cast(ops[2].getType()).getSize()); + ops[3] = getMaskVecValue(builder, loc, ops[3], minElts); + return emitIntrinsicCallOp(builder, loc, intrinsicName, + convertType(expr->getType()), ops); + } + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: { + llvm::StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + intrinsicName = "x86.avx512.mask.scatter.dpd.512"; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + intrinsicName = "x86.avx512.mask.scatter.dps.512"; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + intrinsicName = "x86.avx512.mask.scatter.qpd.512"; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + intrinsicName = "x86.avx512.mask.scatter.qps.512"; + break; + case X86::BI__builtin_ia32_scattersiv8di: + intrinsicName = "x86.avx512.mask.scatter.dpq.512"; + break; + case X86::BI__builtin_ia32_scattersiv16si: + intrinsicName = "x86.avx512.mask.scatter.dpi.512"; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + intrinsicName = "x86.avx512.mask.scatter.qpq.512"; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + intrinsicName = "x86.avx512.mask.scatter.qpi.512"; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + intrinsicName = "x86.avx512.mask.scatterdiv2.df"; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + intrinsicName = "x86.avx512.mask.scatterdiv2.di"; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + intrinsicName = "x86.avx512.mask.scatterdiv4.df"; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + intrinsicName = "x86.avx512.mask.scatterdiv4.di"; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + intrinsicName = "x86.avx512.mask.scatterdiv4.sf"; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + intrinsicName = "x86.avx512.mask.scatterdiv4.si"; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + intrinsicName = "x86.avx512.mask.scatterdiv8.sf"; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + intrinsicName = "x86.avx512.mask.scatterdiv8.si"; + break; + case X86::BI__builtin_ia32_scattersiv2df: + intrinsicName = "x86.avx512.mask.scattersiv2.df"; + break; + case X86::BI__builtin_ia32_scattersiv2di: + intrinsicName = "x86.avx512.mask.scattersiv2.di"; + break; + case X86::BI__builtin_ia32_scattersiv4df: + intrinsicName = "x86.avx512.mask.scattersiv4.df"; + break; + case X86::BI__builtin_ia32_scattersiv4di: + intrinsicName = "x86.avx512.mask.scattersiv4.di"; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + intrinsicName = "x86.avx512.mask.scattersiv4.sf"; + break; + case X86::BI__builtin_ia32_scattersiv4si: + intrinsicName = "x86.avx512.mask.scattersiv4.si"; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + intrinsicName = "x86.avx512.mask.scattersiv8.sf"; + break; + case X86::BI__builtin_ia32_scattersiv8si: + intrinsicName = "x86.avx512.mask.scattersiv8.si"; + break; + } + + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned minElts = + std::min(cast(ops[2].getType()).getSize(), + cast(ops[3].getType()).getSize()); + ops[1] = getMaskVecValue(builder, loc, ops[1], minElts); + + return emitIntrinsicCallOp(builder, loc, intrinsicName, + convertType(expr->getType()), ops); + } + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_pmovqd512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: + return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), + true); + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: + return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), + false); + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilpd512: + case X86::BI__builtin_ia32_vpermilps512: { + const uint32_t imm = getSExtIntValueFromConstOp(ops[1]); + + llvm::SmallVector mask(16); + computeFullLaneShuffleMask(*this, ops[0], imm, false, mask); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask); + } + case X86::BI__builtin_ia32_shufpd: + case X86::BI__builtin_ia32_shufpd256: + case X86::BI__builtin_ia32_shufpd512: + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: { + const uint32_t imm = getZExtIntValueFromConstOp(ops[2]); + + llvm::SmallVector mask(16); + computeFullLaneShuffleMask(*this, ops[0], imm, true, mask); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1], + mask); + } + case X86::BI__builtin_ia32_permdi256: + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi512: + case X86::BI__builtin_ia32_permdf512: + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_i64x2: + case X86::BI__builtin_ia32_vperm2f128_pd256: + case X86::BI__builtin_ia32_vperm2f128_ps256: + case X86::BI__builtin_ia32_vperm2f128_si256: + case X86::BI__builtin_ia32_permti256: + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: + case X86::BI__builtin_ia32_pslldqi512_byteshift: + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + case X86::BI__builtin_ia32_psrldqi512_byteshift: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned shiftVal = + ops[1].getDefiningOp().getIntValue().getZExtValue() & + 0xff; + unsigned numElems = cast(ops[0].getType()).getWidth(); + + if (shiftVal >= numElems) + return builder.getNullValue(ops[0].getType(), loc); + + mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); + + SmallVector indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal)); + + mlir::Value zero = builder.getNullValue(in.getType(), loc); + mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices); + return builder.createBitcast(sv, ops[0].getType()); + } + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned shiftVal = + ops[1].getDefiningOp().getIntValue().getZExtValue() & + 0xff; + unsigned numElems = cast(ops[0].getType()).getWidth(); + + if (shiftVal >= numElems) + return builder.getNullValue(ops[0].getType(), loc); + + mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); + + SmallVector indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal)); + + mlir::Value zero = builder.getNullValue(in.getType(), loc); + mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices); + return builder.createBitcast(sv, ops[0].getType()); + } + case X86::BI__builtin_ia32_vprotbi: + case X86::BI__builtin_ia32_vprotwi: + case X86::BI__builtin_ia32_vprotdi: + case X86::BI__builtin_ia32_vprotqi: + case X86::BI__builtin_ia32_prold128: + case X86::BI__builtin_ia32_prold256: + case X86::BI__builtin_ia32_prold512: + case X86::BI__builtin_ia32_prolq128: + case X86::BI__builtin_ia32_prolq256: + case X86::BI__builtin_ia32_prolq512: + return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], + ops[0], ops[1], false); + case X86::BI__builtin_ia32_prord128: + case X86::BI__builtin_ia32_prord256: + case X86::BI__builtin_ia32_prord512: + case X86::BI__builtin_ia32_prorq128: + case X86::BI__builtin_ia32_prorq256: + case X86::BI__builtin_ia32_prorq512: + return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], + ops[0], ops[1], true); + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectph_128: + case X86::BI__builtin_ia32_selectph_256: + case X86::BI__builtin_ia32_selectph_512: + case X86::BI__builtin_ia32_selectpbf_128: + case X86::BI__builtin_ia32_selectpbf_256: + case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: + case X86::BI__builtin_ia32_selectsh_128: + case X86::BI__builtin_ia32_selectsbf_128: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: + case X86::BI__builtin_ia32_cmpb128_mask: + case X86::BI__builtin_ia32_cmpb256_mask: + case X86::BI__builtin_ia32_cmpb512_mask: + case X86::BI__builtin_ia32_cmpw128_mask: + case X86::BI__builtin_ia32_cmpw256_mask: + case X86::BI__builtin_ia32_cmpw512_mask: + case X86::BI__builtin_ia32_cmpd128_mask: + case X86::BI__builtin_ia32_cmpd256_mask: + case X86::BI__builtin_ia32_cmpd512_mask: + case X86::BI__builtin_ia32_cmpq128_mask: + case X86::BI__builtin_ia32_cmpq256_mask: + case X86::BI__builtin_ia32_cmpq512_mask: + case X86::BI__builtin_ia32_ucmpb128_mask: + case X86::BI__builtin_ia32_ucmpb256_mask: + case X86::BI__builtin_ia32_ucmpb512_mask: + case X86::BI__builtin_ia32_ucmpw128_mask: + case X86::BI__builtin_ia32_ucmpw256_mask: + case X86::BI__builtin_ia32_ucmpw512_mask: + case X86::BI__builtin_ia32_ucmpd128_mask: + case X86::BI__builtin_ia32_ucmpd256_mask: + case X86::BI__builtin_ia32_ucmpd512_mask: + case X86::BI__builtin_ia32_ucmpq128_mask: + case X86::BI__builtin_ia32_ucmpq256_mask: + case X86::BI__builtin_ia32_ucmpq512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false); + case X86::BI__builtin_ia32_kortestcqi: + case X86::BI__builtin_ia32_kortestchi: + case X86::BI__builtin_ia32_kortestcsi: + case X86::BI__builtin_ia32_kortestcdi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + cir::IntType ty = cast(ops[0].getType()); + mlir::Value allOnesOp = + builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth())); + mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); + mlir::Value cmp = + cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp); + return builder.createCast(cir::CastKind::bool_to_int, cmp, + cgm.convertType(expr->getType())); + } + case X86::BI__builtin_ia32_kortestzqi: + case X86::BI__builtin_ia32_kortestzhi: + case X86::BI__builtin_ia32_kortestzsi: + case X86::BI__builtin_ia32_kortestzdi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + cir::IntType ty = cast(ops[0].getType()); + mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult(); + mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); + mlir::Value cmp = + cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp); + return builder.createCast(cir::CastKind::bool_to_int, cmp, + cgm.convertType(expr->getType())); + } + case X86::BI__builtin_ia32_ktestcqi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.b", ops); + case X86::BI__builtin_ia32_ktestzqi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.b", ops); + case X86::BI__builtin_ia32_ktestchi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.w", ops); + case X86::BI__builtin_ia32_ktestzhi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.w", ops); + case X86::BI__builtin_ia32_ktestcsi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.d", ops); + case X86::BI__builtin_ia32_ktestzsi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.d", ops); + case X86::BI__builtin_ia32_ktestcdi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.q", ops); + case X86::BI__builtin_ia32_ktestzdi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.q", ops); + case X86::BI__builtin_ia32_kaddqi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.b", ops); + case X86::BI__builtin_ia32_kaddhi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.w", ops); + case X86::BI__builtin_ia32_kaddsi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.d", ops); + case X86::BI__builtin_ia32_kadddi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.q", ops); + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::And, ops); + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::And, ops, true); + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Or, ops); + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Xor, ops, true); + case X86::BI__builtin_ia32_kxorqi: + case X86::BI__builtin_ia32_kxorhi: + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Xor, ops); + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: { + cir::IntType intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value resVec = + getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); + return builder.createBitcast(builder.createNot(resVec), ops[0].getType()); + } + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: { + // Bitcast to vXi1 type and then back to integer. This gets the mask + // register type into the IR, but might be optimized out depending on + // what's around it. + cir::IntType intTy = cast(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value resVec = + getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); + return builder.createBitcast(resVec, ops[0].getType()); + } + case X86::BI__builtin_ia32_sqrtsh_round_mask: + case X86::BI__builtin_ia32_sqrtsd_round_mask: + case X86::BI__builtin_ia32_sqrtss_round_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_sqrtph512: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value arg = ops[0]; + return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); + } + case X86::BI__builtin_ia32_pmuludq128: + case X86::BI__builtin_ia32_pmuludq256: + case X86::BI__builtin_ia32_pmuludq512: { + unsigned opTypePrimitiveSizeInBits = + cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); + return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false, + ops, opTypePrimitiveSizeInBits); + } + case X86::BI__builtin_ia32_pmuldq128: + case X86::BI__builtin_ia32_pmuldq256: + case X86::BI__builtin_ia32_pmuldq512: { + unsigned opTypePrimitiveSizeInBits = + cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); + return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true, + ops, opTypePrimitiveSizeInBits); + } + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + case X86::BI__builtin_ia32_reduce_fadd_pd512: + case X86::BI__builtin_ia32_reduce_fadd_ps512: + case X86::BI__builtin_ia32_reduce_fadd_ph512: + case X86::BI__builtin_ia32_reduce_fadd_ph256: + case X86::BI__builtin_ia32_reduce_fadd_ph128: + case X86::BI__builtin_ia32_reduce_fmul_pd512: + case X86::BI__builtin_ia32_reduce_fmul_ps512: + case X86::BI__builtin_ia32_reduce_fmul_ph512: + case X86::BI__builtin_ia32_reduce_fmul_ph256: + case X86::BI__builtin_ia32_reduce_fmul_ph128: + case X86::BI__builtin_ia32_reduce_fmax_pd512: + case X86::BI__builtin_ia32_reduce_fmax_ps512: + case X86::BI__builtin_ia32_reduce_fmax_ph512: + case X86::BI__builtin_ia32_reduce_fmax_ph256: + case X86::BI__builtin_ia32_reduce_fmax_ph128: + case X86::BI__builtin_ia32_reduce_fmin_pd512: + case X86::BI__builtin_ia32_reduce_fmin_ps512: + case X86::BI__builtin_ia32_reduce_fmin_ph512: + case X86::BI__builtin_ia32_reduce_fmin_ph256: + case X86::BI__builtin_ia32_reduce_fmin_ph128: + case X86::BI__builtin_ia32_rdrand16_step: + case X86::BI__builtin_ia32_rdrand32_step: + case X86::BI__builtin_ia32_rdrand64_step: + case X86::BI__builtin_ia32_rdseed16_step: + case X86::BI__builtin_ia32_rdseed32_step: + case X86::BI__builtin_ia32_rdseed64_step: + case X86::BI__builtin_ia32_addcarryx_u32: + case X86::BI__builtin_ia32_addcarryx_u64: + case X86::BI__builtin_ia32_subborrow_u32: + case X86::BI__builtin_ia32_subborrow_u64: + case X86::BI__builtin_ia32_fpclassps128_mask: + case X86::BI__builtin_ia32_fpclassps256_mask: + case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_vfpclassbf16128_mask: + case X86::BI__builtin_ia32_vfpclassbf16256_mask: + case X86::BI__builtin_ia32_vfpclassbf16512_mask: + case X86::BI__builtin_ia32_fpclassph128_mask: + case X86::BI__builtin_ia32_fpclassph256_mask: + case X86::BI__builtin_ia32_fpclassph512_mask: + case X86::BI__builtin_ia32_fpclasspd128_mask: + case X86::BI__builtin_ia32_fpclasspd256_mask: + case X86::BI__builtin_ia32_fpclasspd512_mask: + case X86::BI__builtin_ia32_vp2intersect_q_512: + case X86::BI__builtin_ia32_vp2intersect_q_256: + case X86::BI__builtin_ia32_vp2intersect_q_128: + case X86::BI__builtin_ia32_vp2intersect_d_512: + case X86::BI__builtin_ia32_vp2intersect_d_256: + case X86::BI__builtin_ia32_vp2intersect_d_128: + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqpd: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltpd: + return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), + cir::CmpOpKind::lt, /*shouldInvert=*/true); + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnlepd: + return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), + cir::CmpOpKind::le, /*shouldInvert=*/true); + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpph128_mask: + case X86::BI__builtin_ia32_cmpph256_mask: + case X86::BI__builtin_ia32_cmpph512_mask: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: + case X86::BI__builtin_ia32_vcmpbf16512_mask: + case X86::BI__builtin_ia32_vcmpbf16256_mask: + case X86::BI__builtin_ia32_vcmpbf16128_mask: + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + case X86::BI__builtin_ia32_vcvtph2ps_mask: + case X86::BI__builtin_ia32_vcvtph2ps256_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + case X86::BI__cpuid: + case X86::BI__cpuidex: + case X86::BI__emul: + case X86::BI__emulu: + case X86::BI__mulh: + case X86::BI__umulh: + case X86::BI_mul128: + case X86::BI_umul128: + case X86::BI__faststorefence: + case X86::BI__shiftleft128: + case X86::BI__shiftright128: + case X86::BI_ReadWriteBarrier: + case X86::BI_ReadBarrier: + case X86::BI_WriteBarrier: + case X86::BI_AddressOfReturnAddress: + case X86::BI__stosb: + case X86::BI__ud2: + case X86::BI__int2c: + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: + case X86::BI__builtin_ia32_encodekey128_u32: + case X86::BI__builtin_ia32_encodekey256_u32: + case X86::BI__builtin_ia32_aesenc128kl_u8: + case X86::BI__builtin_ia32_aesdec128kl_u8: + case X86::BI__builtin_ia32_aesenc256kl_u8: + case X86::BI__builtin_ia32_aesdec256kl_u8: + case X86::BI__builtin_ia32_aesencwide128kl_u8: + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + case X86::BI__builtin_ia32_aesencwide256kl_u8: + case X86::BI__builtin_ia32_aesdecwide256kl_u8: + case X86::BI__builtin_ia32_vfcmaddcph512_mask: + case X86::BI__builtin_ia32_vfmaddcph512_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: + case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: + case X86::BI__builtin_ia32_prefetchi: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } +} From 996b7e7f2d01584b8cae039669c3b6b9ad78c506 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Mon, 8 Dec 2025 14:04:55 -0800 Subject: [PATCH 33/33] Fix formatting --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 228da428844e9..bda74b7fbdf6e 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -187,7 +187,7 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( } mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( - cir::SqrtOp op, OpAdaptor adaptor, + cir::SqrtOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { mlir::Type resTy = typeConverter->convertType(op.getType()); rewriter.replaceOpWithNewOp(op, resTy, adaptor.getSrc());