Skip to content

Commit c8ffb85

Browse files
committed
[CIR][X86] Implement lowering for AVX512 ktest builtins (kortestc, kortestz)
This patch adds CIR codegen support for the AVX512 mask test builtins on X86, including kortestc and kortestz across all supported mask widths (qi, hi, si, di). Each builtin is lowered to the expected vector<i1> mask logic and scalar comparison form in CIR, consistent with the semantics of the corresponding LLVM implementations. Because ClangIR does not yet provide a dedicated `zext` operation, the lowering emulates zero-extension by first converting the boolean result through `bool_to_int` and then performing an integer cast to the final result type. This reproduces the `icmp` + `zext` pattern used in LLVM IR and maintains semantic equivalence.
1 parent 8e449ec commit c8ffb85

File tree

4 files changed

+273
-6
lines changed

4 files changed

+273
-6
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -757,14 +757,40 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
757757
case X86::BI__builtin_ia32_vpcomuw:
758758
case X86::BI__builtin_ia32_vpcomud:
759759
case X86::BI__builtin_ia32_vpcomuq:
760+
cgm.errorNYI(expr->getSourceRange(),
761+
std::string("unimplemented X86 builtin call: ") +
762+
getContext().BuiltinInfo.getName(builtinID));
763+
return {};
760764
case X86::BI__builtin_ia32_kortestcqi:
761765
case X86::BI__builtin_ia32_kortestchi:
762766
case X86::BI__builtin_ia32_kortestcsi:
763-
case X86::BI__builtin_ia32_kortestcdi:
767+
case X86::BI__builtin_ia32_kortestcdi: {
768+
mlir::Location loc = getLoc(expr->getExprLoc());
769+
cir::IntType ty = cast<cir::IntType>(ops[0].getType());
770+
cir::IntAttr allOnesAttr =
771+
cir::IntAttr::get(ty, APInt::getAllOnes(ty.getWidth()));
772+
cir::ConstantOp allOnesOp = builder.getConstant(loc, allOnesAttr);
773+
mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
774+
mlir::Value cmp =
775+
cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
776+
return builder.createCast(cir::CastKind::bool_to_int, cmp,
777+
cgm.convertType(expr->getType()));
778+
}
764779
case X86::BI__builtin_ia32_kortestzqi:
765780
case X86::BI__builtin_ia32_kortestzhi:
766781
case X86::BI__builtin_ia32_kortestzsi:
767-
case X86::BI__builtin_ia32_kortestzdi:
782+
case X86::BI__builtin_ia32_kortestzdi: {
783+
mlir::Location loc = getLoc(expr->getExprLoc());
784+
cir::IntType ty = cast<cir::IntType>(ops[0].getType());
785+
cir::IntAttr allZerosAttr =
786+
cir::IntAttr::get(ty, APInt::getZero(ty.getWidth()));
787+
cir::ConstantOp allZerosOp = builder.getConstant(loc, allZerosAttr);
788+
mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
789+
mlir::Value cmp =
790+
cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
791+
return builder.createCast(cir::CastKind::bool_to_int, cmp,
792+
cgm.convertType(expr->getType()));
793+
}
768794
case X86::BI__builtin_ia32_ktestcqi:
769795
case X86::BI__builtin_ia32_ktestzqi:
770796
case X86::BI__builtin_ia32_ktestchi:
@@ -773,10 +799,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
773799
case X86::BI__builtin_ia32_ktestzsi:
774800
case X86::BI__builtin_ia32_ktestcdi:
775801
case X86::BI__builtin_ia32_ktestzdi:
776-
cgm.errorNYI(expr->getSourceRange(),
777-
std::string("unimplemented X86 builtin call: ") +
778-
getContext().BuiltinInfo.getName(builtinID));
779-
return {};
780802
case X86::BI__builtin_ia32_kaddqi:
781803
return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
782804
"x86.avx512.kadd.b", ops);

clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,3 +465,127 @@ __mmask64 test_kmov_q(__mmask64 A) {
465465

466466
return __builtin_ia32_kmovq(A);
467467
}
468+
469+
unsigned char test_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
470+
// CIR-LABEL: _kortestc_mask32_u8
471+
// CIR: [[ALL_ONES:%.*]] = cir.const #cir.int<4294967295> : !u32i
472+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
473+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
474+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<32 x !cir.int<u, 1>>
475+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
476+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ALL_ONES]]) : !u32i, !cir.bool
477+
// CIR: [[B2I:%.*]] = cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
478+
// CIR: cir.cast integral [[B2I]] : !s32i -> !u8i
479+
480+
// LLVM-LABEL: _kortestc_mask32_u8
481+
// LLVM: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
482+
// LLVM: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
483+
// LLVM: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
484+
// LLVM: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
485+
// LLVM: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
486+
// LLVM: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
487+
// LLVM: trunc i32 [[ZEXT]] to i8
488+
489+
// OGCG-LABEL: _kortestc_mask32_u8
490+
// OGCG: bitcast i32 %{{.*}} to <32 x i1>
491+
// OGCG: bitcast i32 %{{.*}} to <32 x i1>
492+
// OGCG: or <32 x i1> {{.*}}, {{.*}}
493+
// OGCG: bitcast <32 x i1> {{.*}} to i32
494+
// OGCG: icmp eq i32 {{.*}}, -1
495+
// OGCG: zext i1 {{.*}} to i32
496+
// OGCG: trunc i32 {{.*}} to i8
497+
return _kortestc_mask32_u8(__A, __B);
498+
}
499+
500+
unsigned char test_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
501+
// CIR-LABEL: _kortestc_mask64_u8
502+
// CIR: [[ALL_ONES:%.*]] = cir.const #cir.int<18446744073709551615> : !u64i
503+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
504+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
505+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<64 x !cir.int<u, 1>>
506+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
507+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ALL_ONES]]) : !u64i, !cir.bool
508+
// CIR: [[B2I:%.*]] = cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
509+
// CIR: cir.cast integral [[B2I]] : !s32i -> !u8i
510+
511+
// LLVM-LABEL: _kortestc_mask64_u8
512+
// LLVM: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
513+
// LLVM: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
514+
// LLVM: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
515+
// LLVM: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
516+
// LLVM: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
517+
// LLVM: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
518+
// LLVM: trunc i32 [[ZEXT]] to i8
519+
520+
// OGCG-LABEL: _kortestc_mask64_u8
521+
// OGCG: bitcast i64 %{{.*}} to <64 x i1>
522+
// OGCG: bitcast i64 %{{.*}} to <64 x i1>
523+
// OGCG: or <64 x i1> {{.*}}, {{.*}}
524+
// OGCG: bitcast <64 x i1> {{.*}} to i64
525+
// OGCG: icmp eq i64 {{.*}}, -1
526+
// OGCG: zext i1 {{.*}} to i32
527+
// OGCG: trunc i32 {{.*}} to i8
528+
return _kortestc_mask64_u8(__A, __B);
529+
}
530+
531+
unsigned char test_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
532+
// CIR-LABEL: _kortestz_mask32_u8
533+
// CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !u32i
534+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
535+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
536+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<32 x !cir.int<u, 1>>
537+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
538+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ZERO]]) : !u32i, !cir.bool
539+
// CIR: [[B2I:%.*]] = cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
540+
// CIR: cir.cast integral [[B2I]] : !s32i -> !u8i
541+
542+
// LLVM-LABEL: _kortestz_mask32_u8
543+
// LLVM: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
544+
// LLVM: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
545+
// LLVM: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
546+
// LLVM: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
547+
// LLVM: [[CMP:%.*]] = icmp eq i32 [[CAST]], 0
548+
// LLVM: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
549+
// LLVM: trunc i32 [[ZEXT]] to i8
550+
551+
// OGCG-LABEL: _kortestz_mask32_u8
552+
// OGCG: bitcast i32 %{{.*}} to <32 x i1>
553+
// OGCG: bitcast i32 %{{.*}} to <32 x i1>
554+
// OGCG: or <32 x i1> {{.*}}, {{.*}}
555+
// OGCG: bitcast <32 x i1> {{.*}} to i32
556+
// OGCG: icmp eq i32 {{.*}}, 0
557+
// OGCG: zext i1 {{.*}} to i32
558+
// OGCG: trunc i32 {{.*}} to i8
559+
return _kortestz_mask32_u8(__A, __B);
560+
}
561+
562+
unsigned char test_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
563+
// CIR-LABEL: _kortestz_mask64_u8
564+
// CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !u64i
565+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
566+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
567+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<64 x !cir.int<u, 1>>
568+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
569+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ZERO]]) : !u64i, !cir.bool
570+
// CIR: [[B2I:%.*]] = cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
571+
// CIR: cir.cast integral [[B2I]] : !s32i -> !u8i
572+
573+
// LLVM-LABEL: _kortestz_mask64_u8
574+
// LLVM: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
575+
// LLVM: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
576+
// LLVM: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
577+
// LLVM: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
578+
// LLVM: [[CMP:%.*]] = icmp eq i64 [[CAST]], 0
579+
// LLVM: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
580+
// LLVM: trunc i32 [[ZEXT]] to i8
581+
582+
// OGCG-LABEL: _kortestz_mask64_u8
583+
// OGCG: bitcast i64 %{{.*}} to <64 x i1>
584+
// OGCG: bitcast i64 %{{.*}} to <64 x i1>
585+
// OGCG: or <64 x i1> {{.*}}, {{.*}}
586+
// OGCG: bitcast <64 x i1> {{.*}} to i64
587+
// OGCG: icmp eq i64 {{.*}}, 0
588+
// OGCG: zext i1 {{.*}} to i32
589+
// OGCG: trunc i32 {{.*}} to i8
590+
return _kortestz_mask64_u8(__A, __B);
591+
}

clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,68 @@ __mmask8 test_kmov_b(__mmask8 A) {
208208
// OGCG: bitcast <8 x i1> {{.*}} to i8
209209
return __builtin_ia32_kmovb(A);
210210
}
211+
212+
213+
unsigned char test_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) {
214+
// CIR-LABEL: _kortestc_mask8_u8
215+
// CIR: [[ALL_ONES:%.*]] = cir.const #cir.int<255> : !u8i
216+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
217+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
218+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<8 x !cir.int<u, 1>>
219+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
220+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ALL_ONES]]) : !u8i, !cir.bool
221+
// CIR: cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
222+
// CIR: cir.cast integral {{.*}} : !s32i -> !u8i
223+
224+
225+
// LLVM-LABEL: _kortestc_mask8_u8
226+
// LLVM: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
227+
// LLVM: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
228+
// LLVM: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
229+
// LLVM: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
230+
// LLVM: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
231+
// LLVM: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
232+
// LLVM: trunc i32 [[ZEXT]] to i8
233+
234+
// OGCG-LABEL: _kortestc_mask8_u8
235+
// OGCG: bitcast i8 %{{.*}} to <8 x i1>
236+
// OGCG: bitcast i8 %{{.*}} to <8 x i1>
237+
// OGCG: or <8 x i1> {{.*}}, {{.*}}
238+
// OGCG: bitcast <8 x i1> {{.*}} to i8
239+
// OGCG: icmp eq i8 {{.*}}, -1
240+
// OGCG: zext i1 {{.*}} to i32
241+
// OGCG: trunc i32 {{.*}} to i8
242+
return _kortestc_mask8_u8(__A,__B);
243+
}
244+
245+
unsigned char test_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) {
246+
// CIR-LABEL: _kortestz_mask8_u8
247+
// CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !u8i
248+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
249+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
250+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<8 x !cir.int<u, 1>>
251+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
252+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ZERO]]) : !u8i, !cir.bool
253+
// CIR: cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
254+
// CIR: cir.cast integral {{.*}} : !s32i -> !u8i
255+
256+
257+
// LLVM-LABEL: _kortestz_mask8_u8
258+
// LLVM: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
259+
// LLVM: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
260+
// LLVM: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
261+
// LLVM: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
262+
// LLVM: [[CMP:%.*]] = icmp eq i8 [[CAST]], 0
263+
// LLVM: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
264+
// LLVM: trunc i32 [[ZEXT]] to i8
265+
266+
// OGCG-LABEL: _kortestz_mask8_u8
267+
// OGCG: bitcast i8 %{{.*}} to <8 x i1>
268+
// OGCG: bitcast i8 %{{.*}} to <8 x i1>
269+
// OGCG: or <8 x i1> {{.*}}, {{.*}}
270+
// OGCG: bitcast <8 x i1> {{.*}} to i8
271+
// OGCG: icmp eq i8 {{.*}}, 0
272+
// OGCG: zext i1 {{.*}} to i32
273+
// OGCG: trunc i32 {{.*}} to i8
274+
return _kortestz_mask8_u8(__A,__B);
275+
}

clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,59 @@ __mmask16 test_kmov_w(__mmask16 A) {
228228
// OGCG: bitcast <16 x i1> {{.*}} to i16
229229
return __builtin_ia32_kmovw(A);
230230
}
231+
232+
int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
233+
// CIR-LABEL: _mm512_kortestc
234+
// CIR: [[ALL_ONES:%.*]] = cir.const #cir.int<65535> : !u16i
235+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
236+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
237+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<16 x !cir.int<u, 1>>
238+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
239+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ALL_ONES]]) : !u16i, !cir.bool
240+
// CIR: cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
241+
242+
// LLVM-LABEL: _mm512_kortestc
243+
// LLVM: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
244+
// LLVM: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
245+
// LLVM: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
246+
// LLVM: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
247+
// LLVM: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
248+
// LLVM: zext i1 [[CMP]] to i32
249+
250+
// OGCG-LABEL: _mm512_kortestc
251+
// OGCG: bitcast i16 %{{.*}} to <16 x i1>
252+
// OGCG: bitcast i16 %{{.*}} to <16 x i1>
253+
// OGCG: or <16 x i1> {{.*}}, {{.*}}
254+
// OGCG: bitcast <16 x i1> {{.*}} to i16
255+
// OGCG: icmp eq i16 {{.*}}, -1
256+
// OGCG: zext i1 {{.*}} to i32
257+
return _mm512_kortestc(__A,__B);
258+
}
259+
260+
int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
261+
// CIR-LABEL: _mm512_kortestz
262+
// CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !u16i
263+
// CIR: [[LHS:%.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
264+
// CIR: [[RHS:%.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
265+
// CIR: [[OR:%.*]] = cir.binop(or, [[LHS]], [[RHS]]) : !cir.vector<16 x !cir.int<u, 1>>
266+
// CIR: [[OR_INT:%.*]] = cir.cast bitcast [[OR]] : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
267+
// CIR: [[CMP:%.*]] = cir.cmp(eq, [[OR_INT]], [[ZERO]]) : !u16i, !cir.bool
268+
// CIR: cir.cast bool_to_int [[CMP]] : !cir.bool -> !s32i
269+
270+
// LLVM-LABEL: _mm512_kortestz
271+
// LLVM: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
272+
// LLVM: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
273+
// LLVM: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
274+
// LLVM: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
275+
// LLVM: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0
276+
// LLVM: zext i1 [[CMP]] to i32
277+
278+
// OGCG-LABEL: _mm512_kortestz
279+
// OGCG: bitcast i16 %{{.*}} to <16 x i1>
280+
// OGCG: bitcast i16 %{{.*}} to <16 x i1>
281+
// OGCG: or <16 x i1> {{.*}}, {{.*}}
282+
// OGCG: bitcast <16 x i1> {{.*}} to i16
283+
// OGCG: icmp eq i16 {{.*}}, 0
284+
// OGCG: zext i1 {{.*}} to i32
285+
return _mm512_kortestz(__A,__B);
286+
}

0 commit comments

Comments
 (0)